/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 923 - (hide annotations) (download)
Tue Feb 21 13:25:05 2012 UTC (14 months, 4 weeks ago) by ph10
File MIME type: text/plain
File size: 132077 byte(s)
Make it possible for pcretest to select which JIT compile options are used.

1 ph10 922 /*.************************************************
2 nigel 3 * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 836 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39 ph10 836 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40     libraries in a single program. This is different from the modules such as
41     pcre_compile.c in the library itself, which are compiled separately for each
42     mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43     (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44     compiled only once. Therefore, it must not make use of any of the macros from
45     pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46     however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47     only supported library functions. */
48 nigel 75
49 ph10 836
50 ph10 200 #ifdef HAVE_CONFIG_H
51 ph10 236 #include "config.h"
52 ph10 200 #endif
53 ph10 199
54 nigel 3 #include <ctype.h>
55     #include <stdio.h>
56     #include <string.h>
57     #include <stdlib.h>
58     #include <time.h>
59 nigel 25 #include <locale.h>
60 nigel 75 #include <errno.h>
61 nigel 3
62 ph10 287 #ifdef SUPPORT_LIBREADLINE
63 ph10 343 #ifdef HAVE_UNISTD_H
64 ph10 287 #include <unistd.h>
65 ph10 343 #endif
66 ph10 287 #include <readline/readline.h>
67     #include <readline/history.h>
68     #endif
69 nigel 93
70 ph10 287
71 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
72     input and output without "b"; then I was told that "b" was needed in some
73     environments, so it was added for release 5.0 to both the input and output. (It
74     makes no difference on Unix-like systems.) Later I was told that it is wrong
75     for the input on Windows. I've now abstracted the modes into two macros that
76     are set here, to make it easier to fiddle with them, and removed "b" from the
77     input mode under Windows. */
78    
79     #if defined(_WIN32) || defined(WIN32)
80     #include <io.h> /* For _setmode() */
81     #include <fcntl.h> /* For _O_BINARY */
82     #define INPUT_MODE "r"
83     #define OUTPUT_MODE "wb"
84    
85 ph10 411 #ifndef isatty
86     #define isatty _isatty /* This is what Windows calls them, I'm told, */
87     #endif /* though in some environments they seem to */
88     /* be already defined, hence the #ifndefs. */
89     #ifndef fileno
90 ph10 343 #define fileno _fileno
91 ph10 411 #endif
92 ph10 343
93 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95     #ifdef __BORLANDC__
96     #define _setmode(handle, mode) setmode(handle, mode)
97     #endif
98    
99     /* Not Windows */
100    
101 nigel 93 #else
102     #include <sys/time.h> /* These two includes are needed */
103     #include <sys/resource.h> /* for setrlimit(). */
104     #define INPUT_MODE "rb"
105     #define OUTPUT_MODE "wb"
106 nigel 91 #endif
107    
108 zherczeg 905 #define PRIV(name) name
109 nigel 93
110 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
111     displaying the results of pcre_study() and we also need to know about the
112     internal macros, structures, and other internal data values; pcretest has
113     "inside information" compared to a program that strictly follows the PCRE API.
114 nigel 37
115 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
116     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
117     appropriately for an application, not for building PCRE. */
118 nigel 77
119 ph10 145 #include "pcre.h"
120 ph10 836
121     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122     /* Configure internal macros to 16 bit mode. */
123     #define COMPILE_PCRE16
124     #endif
125    
126 nigel 77 #include "pcre_internal.h"
127    
128 ph10 836 /* The pcre_printint() function, which prints the internal form of a compiled
129     regex, is held in a separate file so that (a) it can be compiled in either
130     8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131     when that is compiled in debug mode. */
132    
133     #ifdef SUPPORT_PCRE8
134     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135     #endif
136     #ifdef SUPPORT_PCRE16
137     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138     #endif
139    
140 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
141     to keep two copies, we include the source file here, changing the names of the
142     external symbols to prevent clashes. */
143 nigel 77
144 ph10 836 #define PCRE_INCLUDED
145 nigel 85
146     #include "pcre_tables.c"
147    
148 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
149 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
150 ph10 836 the same as in the printint.src file. We uses it here in cases when the locale
151     has not been explicitly changed, so as to get consistent output from systems
152     that differ in their output from isprint() even in the "C" locale. */
153 nigel 93
154 ph10 836 #ifdef EBCDIC
155     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156     #else
157     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158     #endif
159 nigel 85
160 ph10 836 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161    
162     /* Posix support is disabled in 16 bit only mode. */
163     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164     #define NOPOSIX
165     #endif
166    
167 nigel 37 /* It is possible to compile this test program without including support for
168     testing the POSIX interface, though this is not available via the standard
169     Makefile. */
170    
171     #if !defined NOPOSIX
172 nigel 3 #include "pcreposix.h"
173 nigel 37 #endif
174 nigel 3
175 ph10 836 /* It is also possible, originally for the benefit of a version that was
176     imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177     NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178     automatically cut out the UTF support if PCRE is built without it. */
179 nigel 79
180 ph10 836 #ifndef SUPPORT_UTF
181     #ifndef NOUTF
182     #define NOUTF
183 ph10 107 #endif
184     #endif
185 nigel 79
186 ph10 836 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187     for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188     only from one place and is handled differently). I couldn't dream up any way of
189     using a single macro to do this in a generic way, because of the many different
190     argument requirements. We know that at least one of SUPPORT_PCRE8 and
191     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192     use these in the definitions of generic macros.
193 ph10 107
194 ph10 836 **** Special note about the PCHARSxxx macros: the address of the string to be
195     printed is always given as two arguments: a base address followed by an offset.
196     The base address is cast to the correct data size for 8 or 16 bit data; the
197     offset is in units of this size. If the string were given as base+offset in one
198     argument, the casting might be incorrectly applied. */
199    
200     #ifdef SUPPORT_PCRE8
201    
202     #define PCHARS8(lv, p, offset, len, f) \
203     lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204    
205     #define PCHARSV8(p, offset, len, f) \
206     (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207    
208     #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209     p = read_capture_name8(p, cn8, re)
210    
211 zherczeg 852 #define STRLEN8(p) ((int)strlen((char *)p))
212    
213 ph10 836 #define SET_PCRE_CALLOUT8(callout) \
214     pcre_callout = callout
215    
216 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217     pcre_assign_jit_stack(extra, callback, userdata)
218 ph10 836
219     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220     re = pcre_compile((char *)pat, options, error, erroffset, tables)
221    
222     #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223     namesptr, cbuffer, size) \
224     rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225     (char *)namesptr, cbuffer, size)
226    
227     #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228     rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229    
230     #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231     offsets, size_offsets, workspace, size_workspace) \
232     count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233     offsets, size_offsets, workspace, size_workspace)
234    
235     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236     offsets, size_offsets) \
237     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238     offsets, size_offsets)
239    
240     #define PCRE_FREE_STUDY8(extra) \
241     pcre_free_study(extra)
242    
243     #define PCRE_FREE_SUBSTRING8(substring) \
244     pcre_free_substring(substring)
245    
246     #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247     pcre_free_substring_list(listptr)
248    
249     #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250     getnamesptr, subsptr) \
251     rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252     (char *)getnamesptr, subsptr)
253    
254     #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255     n = pcre_get_stringnumber(re, (char *)ptr)
256    
257     #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258     rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259    
260     #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261     rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262    
263 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264     rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265 ph10 836
266     #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267     pcre_printint(re, outfile, debug_lengths)
268    
269     #define PCRE_STUDY8(extra, re, options, error) \
270     extra = pcre_study(re, options, error)
271    
272 zherczeg 852 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273     pcre_jit_stack_alloc(startsize, maxsize)
274    
275     #define PCRE_JIT_STACK_FREE8(stack) \
276     pcre_jit_stack_free(stack)
277    
278 ph10 836 #endif /* SUPPORT_PCRE8 */
279    
280     /* -----------------------------------------------------------*/
281    
282     #ifdef SUPPORT_PCRE16
283    
284     #define PCHARS16(lv, p, offset, len, f) \
285     lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286    
287     #define PCHARSV16(p, offset, len, f) \
288     (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289    
290     #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291     p = read_capture_name16(p, cn16, re)
292    
293     #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294    
295     #define SET_PCRE_CALLOUT16(callout) \
296 zherczeg 850 pcre16_callout = (int (*)(pcre16_callout_block *))callout
297 ph10 836
298 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299     pcre16_assign_jit_stack((pcre16_extra *)extra, \
300     (pcre16_jit_callback)callback, userdata)
301 ph10 836
302     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303 zherczeg 852 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304     tables)
305 ph10 836
306     #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307     namesptr, cbuffer, size) \
308 zherczeg 852 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309 zherczeg 860 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310 ph10 836
311     #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312     rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313 zherczeg 860 (PCRE_UCHAR16 *)cbuffer, size/2)
314 ph10 836
315     #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316     offsets, size_offsets, workspace, size_workspace) \
317 zherczeg 852 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318     (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319     workspace, size_workspace)
320 ph10 836
321     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322     offsets, size_offsets) \
323 zherczeg 852 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324     len, start_offset, options, offsets, size_offsets)
325 ph10 836
326     #define PCRE_FREE_STUDY16(extra) \
327 zherczeg 850 pcre16_free_study((pcre16_extra *)extra)
328 ph10 836
329     #define PCRE_FREE_SUBSTRING16(substring) \
330     pcre16_free_substring((PCRE_SPTR16)substring)
331    
332     #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333     pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334    
335     #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336     getnamesptr, subsptr) \
337 zherczeg 852 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338     count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339 ph10 836
340     #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341     n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342    
343     #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344     rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345     (PCRE_SPTR16 *)(void*)subsptr)
346    
347     #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348     rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349     (PCRE_SPTR16 **)(void*)listptr)
350    
351 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352 zherczeg 852 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353     tables)
354 ph10 836
355     #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356     pcre16_printint(re, outfile, debug_lengths)
357    
358     #define PCRE_STUDY16(extra, re, options, error) \
359 zherczeg 852 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360 ph10 836
361 zherczeg 852 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362     (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363    
364     #define PCRE_JIT_STACK_FREE16(stack) \
365     pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366    
367 ph10 836 #endif /* SUPPORT_PCRE16 */
368    
369    
370     /* ----- Both modes are supported; a runtime test is needed, except for
371     pcre_config(), and the JIT stack functions, when it doesn't matter which
372     version is called. ----- */
373    
374     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375    
376     #define CHAR_SIZE (use_pcre16? 2:1)
377    
378     #define PCHARS(lv, p, offset, len, f) \
379     if (use_pcre16) \
380     PCHARS16(lv, p, offset, len, f); \
381     else \
382     PCHARS8(lv, p, offset, len, f)
383    
384     #define PCHARSV(p, offset, len, f) \
385     if (use_pcre16) \
386     PCHARSV16(p, offset, len, f); \
387     else \
388     PCHARSV8(p, offset, len, f)
389    
390     #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391     if (use_pcre16) \
392     READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393     else \
394     READ_CAPTURE_NAME8(p, cn8, cn16, re)
395    
396     #define SET_PCRE_CALLOUT(callout) \
397     if (use_pcre16) \
398     SET_PCRE_CALLOUT16(callout); \
399     else \
400     SET_PCRE_CALLOUT8(callout)
401    
402     #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403    
404 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405     if (use_pcre16) \
406     PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407     else \
408     PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409 ph10 836
410     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411     if (use_pcre16) \
412     PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413     else \
414     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415    
416     #define PCRE_CONFIG pcre_config
417    
418     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419     namesptr, cbuffer, size) \
420     if (use_pcre16) \
421     PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422     namesptr, cbuffer, size); \
423     else \
424     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425     namesptr, cbuffer, size)
426    
427     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428     if (use_pcre16) \
429     PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430     else \
431     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432    
433     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434     offsets, size_offsets, workspace, size_workspace) \
435     if (use_pcre16) \
436     PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437     offsets, size_offsets, workspace, size_workspace); \
438     else \
439     PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440     offsets, size_offsets, workspace, size_workspace)
441    
442     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443     offsets, size_offsets) \
444     if (use_pcre16) \
445     PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446     offsets, size_offsets); \
447     else \
448     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449     offsets, size_offsets)
450    
451     #define PCRE_FREE_STUDY(extra) \
452     if (use_pcre16) \
453     PCRE_FREE_STUDY16(extra); \
454     else \
455     PCRE_FREE_STUDY8(extra)
456    
457     #define PCRE_FREE_SUBSTRING(substring) \
458     if (use_pcre16) \
459     PCRE_FREE_SUBSTRING16(substring); \
460     else \
461     PCRE_FREE_SUBSTRING8(substring)
462    
463     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464     if (use_pcre16) \
465     PCRE_FREE_SUBSTRING_LIST16(listptr); \
466     else \
467     PCRE_FREE_SUBSTRING_LIST8(listptr)
468    
469     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470     getnamesptr, subsptr) \
471     if (use_pcre16) \
472     PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473     getnamesptr, subsptr); \
474     else \
475     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476     getnamesptr, subsptr)
477    
478     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479     if (use_pcre16) \
480     PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481     else \
482     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483    
484     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485     if (use_pcre16) \
486     PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487     else \
488     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489    
490     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491     if (use_pcre16) \
492     PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493     else \
494     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495    
496 zherczeg 852 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497     (use_pcre16 ? \
498     PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499     :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500 ph10 836
501 zherczeg 852 #define PCRE_JIT_STACK_FREE(stack) \
502     if (use_pcre16) \
503     PCRE_JIT_STACK_FREE16(stack); \
504     else \
505     PCRE_JIT_STACK_FREE8(stack)
506    
507 ph10 836 #define PCRE_MAKETABLES \
508     (use_pcre16? pcre16_maketables() : pcre_maketables())
509    
510 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511 ph10 836 if (use_pcre16) \
512 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513 ph10 836 else \
514 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515 ph10 836
516     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517     if (use_pcre16) \
518     PCRE_PRINTINT16(re, outfile, debug_lengths); \
519     else \
520     PCRE_PRINTINT8(re, outfile, debug_lengths)
521    
522     #define PCRE_STUDY(extra, re, options, error) \
523     if (use_pcre16) \
524     PCRE_STUDY16(extra, re, options, error); \
525     else \
526     PCRE_STUDY8(extra, re, options, error)
527    
528     /* ----- Only 8-bit mode is supported ----- */
529    
530     #elif defined SUPPORT_PCRE8
531     #define CHAR_SIZE 1
532     #define PCHARS PCHARS8
533     #define PCHARSV PCHARSV8
534     #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
535     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
536     #define STRLEN STRLEN8
537 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
538 ph10 836 #define PCRE_COMPILE PCRE_COMPILE8
539     #define PCRE_CONFIG pcre_config
540     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
542     #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
543     #define PCRE_EXEC PCRE_EXEC8
544     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
545     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
546     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
547     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
548     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
549     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
550     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
551 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
552     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
553 ph10 836 #define PCRE_MAKETABLES pcre_maketables()
554     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555     #define PCRE_PRINTINT PCRE_PRINTINT8
556     #define PCRE_STUDY PCRE_STUDY8
557    
558     /* ----- Only 16-bit mode is supported ----- */
559    
560     #else
561     #define CHAR_SIZE 2
562     #define PCHARS PCHARS16
563     #define PCHARSV PCHARSV16
564     #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
565     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
566     #define STRLEN STRLEN16
567 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
568 ph10 836 #define PCRE_COMPILE PCRE_COMPILE16
569     #define PCRE_CONFIG pcre16_config
570     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
572     #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
573     #define PCRE_EXEC PCRE_EXEC16
574     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
575     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
576     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
577     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
578     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
579     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
580     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
581 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
582     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
583 ph10 836 #define PCRE_MAKETABLES pcre16_maketables()
584     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585     #define PCRE_PRINTINT PCRE_PRINTINT16
586     #define PCRE_STUDY PCRE_STUDY16
587     #endif
588    
589     /* ----- End of mode-specific function call macros ----- */
590    
591    
592 nigel 85 /* Other parameters */
593    
594 nigel 3 #ifndef CLOCKS_PER_SEC
595     #ifdef CLK_TCK
596     #define CLOCKS_PER_SEC CLK_TCK
597     #else
598     #define CLOCKS_PER_SEC 100
599     #endif
600     #endif
601    
602 nigel 93 /* This is the default loop count for timing. */
603    
604 nigel 75 #define LOOPREPEAT 500000
605 nigel 3
606 nigel 85 /* Static variables */
607    
608 nigel 3 static FILE *outfile;
609     static int log_store = 0;
610 nigel 63 static int callout_count;
611     static int callout_extra;
612     static int callout_fail_count;
613     static int callout_fail_id;
614 ph10 210 static int debug_lengths;
615 nigel 63 static int first_callout;
616 nigel 93 static int locale_set = 0;
617 nigel 73 static int show_malloc;
618 ph10 836 static int use_utf;
619 nigel 43 static size_t gotten_store;
620 ph10 836 static size_t first_gotten_store = 0;
621 ph10 645 static const unsigned char *last_callout_mark = NULL;
622 nigel 3
623 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
624    
625     static int buffer_size = 50000;
626 ph10 836 static pcre_uint8 *buffer = NULL;
627     static pcre_uint8 *dbuffer = NULL;
628     static pcre_uint8 *pbuffer = NULL;
629 nigel 3
630 ph10 836 /* Another buffer is needed translation to 16-bit character strings. It will
631     obtained and extended as required. */
632    
633     #ifdef SUPPORT_PCRE16
634     static int buffer16_size = 0;
635     static pcre_uint16 *buffer16 = NULL;
636    
637     #ifdef SUPPORT_PCRE8
638    
639     /* We need the table of operator lengths that is used for 16-bit compiling, in
640     order to swap bytes in a pattern for saving/reloading testing. Luckily, the
641     data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
642     appropriately for the 16-bit world. Just as a safety check, make sure that
643     COMPILE_PCRE16 is *not* set. */
644    
645     #ifdef COMPILE_PCRE16
646     #error COMPILE_PCRE16 must not be set when compiling pcretest.c
647     #endif
648    
649     #if LINK_SIZE == 2
650     #undef LINK_SIZE
651     #define LINK_SIZE 1
652     #elif LINK_SIZE == 3 || LINK_SIZE == 4
653     #undef LINK_SIZE
654     #define LINK_SIZE 2
655     #else
656     #error LINK_SIZE must be either 2, 3, or 4
657     #endif
658    
659 zherczeg 839 #undef IMM2_SIZE
660     #define IMM2_SIZE 1
661    
662 ph10 836 #endif /* SUPPORT_PCRE8 */
663    
664     static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
665     #endif /* SUPPORT_PCRE16 */
666    
667     /* If we have 8-bit support, default use_pcre16 to false; if there is also
668     16-bit support, it can be changed by an option. If there is no 8-bit support,
669     there must be 16-bit support, so default it to 1. */
670    
671     #ifdef SUPPORT_PCRE8
672     static int use_pcre16 = 0;
673     #else
674     static int use_pcre16 = 1;
675     #endif
676    
677 ph10 923 /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
678    
679     static int jit_study_bits[] =
680     {
681     PCRE_STUDY_JIT_COMPILE,
682     PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
683     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
684     PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
685     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
686     PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
687     PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
688     PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
689     };
690    
691 ph10 598 /* Textual explanations for runtime error codes */
692 nigel 75
693 ph10 598 static const char *errtexts[] = {
694     NULL, /* 0 is no error */
695     NULL, /* NOMATCH is handled specially */
696     "NULL argument passed",
697     "bad option value",
698     "magic number missing",
699     "unknown opcode - pattern overwritten?",
700     "no more memory",
701 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
702 ph10 598 "match limit exceeded",
703     "callout error code",
704 ph10 836 NULL, /* BADUTF8/16 is handled specially */
705     NULL, /* BADUTF8/16 offset is handled specially */
706 ph10 598 NULL, /* PARTIAL is handled specially */
707     "not used - internal error",
708     "internal error - pattern overwritten?",
709     "bad count value",
710     "item unsupported for DFA matching",
711     "backreference condition or recursion test not supported for DFA matching",
712     "match limit not supported for DFA matching",
713     "workspace size exceeded in DFA matching",
714 ph10 654 "too much recursion for DFA matching",
715 ph10 598 "recursion limit exceeded",
716     "not used - internal error",
717     "invalid combination of newline options",
718     "bad offset value",
719 ph10 836 NULL, /* SHORTUTF8/16 is handled specially */
720 ph10 676 "nested recursion at the same subject position",
721 ph10 836 "JIT stack limit reached",
722     "pattern compiled in wrong mode: 8-bit/16-bit error"
723 ph10 598 };
724    
725 ph10 654
726 ph10 541 /*************************************************
727     * Alternate character tables *
728     *************************************************/
729 nigel 49
730 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
731     using the default tables of the library. However, the T option can be used to
732     select alternate sets of tables, for different kinds of testing. Note also that
733 ph10 541 the L (locale) option also adjusts the tables. */
734    
735 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
736 ph10 541 only ASCII characters. */
737    
738 ph10 836 static const pcre_uint8 tables0[] = {
739 ph10 541
740     /* This table is a lower casing table. */
741    
742     0, 1, 2, 3, 4, 5, 6, 7,
743     8, 9, 10, 11, 12, 13, 14, 15,
744     16, 17, 18, 19, 20, 21, 22, 23,
745     24, 25, 26, 27, 28, 29, 30, 31,
746     32, 33, 34, 35, 36, 37, 38, 39,
747     40, 41, 42, 43, 44, 45, 46, 47,
748     48, 49, 50, 51, 52, 53, 54, 55,
749     56, 57, 58, 59, 60, 61, 62, 63,
750     64, 97, 98, 99,100,101,102,103,
751     104,105,106,107,108,109,110,111,
752     112,113,114,115,116,117,118,119,
753     120,121,122, 91, 92, 93, 94, 95,
754     96, 97, 98, 99,100,101,102,103,
755     104,105,106,107,108,109,110,111,
756     112,113,114,115,116,117,118,119,
757     120,121,122,123,124,125,126,127,
758     128,129,130,131,132,133,134,135,
759     136,137,138,139,140,141,142,143,
760     144,145,146,147,148,149,150,151,
761     152,153,154,155,156,157,158,159,
762     160,161,162,163,164,165,166,167,
763     168,169,170,171,172,173,174,175,
764     176,177,178,179,180,181,182,183,
765     184,185,186,187,188,189,190,191,
766     192,193,194,195,196,197,198,199,
767     200,201,202,203,204,205,206,207,
768     208,209,210,211,212,213,214,215,
769     216,217,218,219,220,221,222,223,
770     224,225,226,227,228,229,230,231,
771     232,233,234,235,236,237,238,239,
772     240,241,242,243,244,245,246,247,
773     248,249,250,251,252,253,254,255,
774    
775     /* This table is a case flipping table. */
776    
777     0, 1, 2, 3, 4, 5, 6, 7,
778     8, 9, 10, 11, 12, 13, 14, 15,
779     16, 17, 18, 19, 20, 21, 22, 23,
780     24, 25, 26, 27, 28, 29, 30, 31,
781     32, 33, 34, 35, 36, 37, 38, 39,
782     40, 41, 42, 43, 44, 45, 46, 47,
783     48, 49, 50, 51, 52, 53, 54, 55,
784     56, 57, 58, 59, 60, 61, 62, 63,
785     64, 97, 98, 99,100,101,102,103,
786     104,105,106,107,108,109,110,111,
787     112,113,114,115,116,117,118,119,
788     120,121,122, 91, 92, 93, 94, 95,
789     96, 65, 66, 67, 68, 69, 70, 71,
790     72, 73, 74, 75, 76, 77, 78, 79,
791     80, 81, 82, 83, 84, 85, 86, 87,
792     88, 89, 90,123,124,125,126,127,
793     128,129,130,131,132,133,134,135,
794     136,137,138,139,140,141,142,143,
795     144,145,146,147,148,149,150,151,
796     152,153,154,155,156,157,158,159,
797     160,161,162,163,164,165,166,167,
798     168,169,170,171,172,173,174,175,
799     176,177,178,179,180,181,182,183,
800     184,185,186,187,188,189,190,191,
801     192,193,194,195,196,197,198,199,
802     200,201,202,203,204,205,206,207,
803     208,209,210,211,212,213,214,215,
804     216,217,218,219,220,221,222,223,
805     224,225,226,227,228,229,230,231,
806     232,233,234,235,236,237,238,239,
807     240,241,242,243,244,245,246,247,
808     248,249,250,251,252,253,254,255,
809    
810     /* This table contains bit maps for various character classes. Each map is 32
811     bytes long and the bits run from the least significant end of each byte. The
812     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
813     graph, print, punct, and cntrl. Other classes are built from combinations. */
814    
815     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
816     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
817     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819    
820     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
821     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
822     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824    
825     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
826     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
827     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
828     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829    
830     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
832     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
833     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834    
835     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
837     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839    
840     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
841     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
842     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844    
845     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
846     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
847     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849    
850     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
851     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
852     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854    
855     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
856     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
857     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
858     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859    
860     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
861     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
862     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
863     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864    
865     /* This table identifies various classes of character by individual bits:
866     0x01 white space character
867     0x02 letter
868     0x04 decimal digit
869     0x08 hexadecimal digit
870     0x10 alphanumeric or '_'
871     0x80 regular expression metacharacter or binary zero
872     */
873    
874     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
875     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
876     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
877     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
878     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
879     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
880     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
881     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
882     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
883     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
884     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
885     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
886     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
887     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
888     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
889     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
890     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
891     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
892     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
893     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
894     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
895     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
896     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
897     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
898     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
899     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
900     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
901     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
902     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
903     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
904     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
905     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
906    
907 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
908     be at least an approximation of ISO 8859. In particular, there are characters
909 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
910    
911 ph10 836 static const pcre_uint8 tables1[] = {
912 ph10 541 0,1,2,3,4,5,6,7,
913     8,9,10,11,12,13,14,15,
914     16,17,18,19,20,21,22,23,
915     24,25,26,27,28,29,30,31,
916     32,33,34,35,36,37,38,39,
917     40,41,42,43,44,45,46,47,
918     48,49,50,51,52,53,54,55,
919     56,57,58,59,60,61,62,63,
920     64,97,98,99,100,101,102,103,
921     104,105,106,107,108,109,110,111,
922     112,113,114,115,116,117,118,119,
923     120,121,122,91,92,93,94,95,
924     96,97,98,99,100,101,102,103,
925     104,105,106,107,108,109,110,111,
926     112,113,114,115,116,117,118,119,
927     120,121,122,123,124,125,126,127,
928     128,129,130,131,132,133,134,135,
929     136,137,138,139,140,141,142,143,
930     144,145,146,147,148,149,150,151,
931     152,153,154,155,156,157,158,159,
932     160,161,162,163,164,165,166,167,
933     168,169,170,171,172,173,174,175,
934     176,177,178,179,180,181,182,183,
935     184,185,186,187,188,189,190,191,
936     224,225,226,227,228,229,230,231,
937     232,233,234,235,236,237,238,239,
938     240,241,242,243,244,245,246,215,
939     248,249,250,251,252,253,254,223,
940     224,225,226,227,228,229,230,231,
941     232,233,234,235,236,237,238,239,
942     240,241,242,243,244,245,246,247,
943     248,249,250,251,252,253,254,255,
944     0,1,2,3,4,5,6,7,
945     8,9,10,11,12,13,14,15,
946     16,17,18,19,20,21,22,23,
947     24,25,26,27,28,29,30,31,
948     32,33,34,35,36,37,38,39,
949     40,41,42,43,44,45,46,47,
950     48,49,50,51,52,53,54,55,
951     56,57,58,59,60,61,62,63,
952     64,97,98,99,100,101,102,103,
953     104,105,106,107,108,109,110,111,
954     112,113,114,115,116,117,118,119,
955     120,121,122,91,92,93,94,95,
956     96,65,66,67,68,69,70,71,
957     72,73,74,75,76,77,78,79,
958     80,81,82,83,84,85,86,87,
959     88,89,90,123,124,125,126,127,
960     128,129,130,131,132,133,134,135,
961     136,137,138,139,140,141,142,143,
962     144,145,146,147,148,149,150,151,
963     152,153,154,155,156,157,158,159,
964     160,161,162,163,164,165,166,167,
965     168,169,170,171,172,173,174,175,
966     176,177,178,179,180,181,182,183,
967     184,185,186,187,188,189,190,191,
968     224,225,226,227,228,229,230,231,
969     232,233,234,235,236,237,238,239,
970     240,241,242,243,244,245,246,215,
971     248,249,250,251,252,253,254,223,
972     192,193,194,195,196,197,198,199,
973     200,201,202,203,204,205,206,207,
974     208,209,210,211,212,213,214,247,
975     216,217,218,219,220,221,222,255,
976     0,62,0,0,1,0,0,0,
977     0,0,0,0,0,0,0,0,
978     32,0,0,0,1,0,0,0,
979     0,0,0,0,0,0,0,0,
980     0,0,0,0,0,0,255,3,
981     126,0,0,0,126,0,0,0,
982     0,0,0,0,0,0,0,0,
983     0,0,0,0,0,0,0,0,
984     0,0,0,0,0,0,255,3,
985     0,0,0,0,0,0,0,0,
986     0,0,0,0,0,0,12,2,
987     0,0,0,0,0,0,0,0,
988     0,0,0,0,0,0,0,0,
989     254,255,255,7,0,0,0,0,
990     0,0,0,0,0,0,0,0,
991     255,255,127,127,0,0,0,0,
992     0,0,0,0,0,0,0,0,
993     0,0,0,0,254,255,255,7,
994     0,0,0,0,0,4,32,4,
995     0,0,0,128,255,255,127,255,
996     0,0,0,0,0,0,255,3,
997     254,255,255,135,254,255,255,7,
998     0,0,0,0,0,4,44,6,
999     255,255,127,255,255,255,127,255,
1000     0,0,0,0,254,255,255,255,
1001     255,255,255,255,255,255,255,127,
1002     0,0,0,0,254,255,255,255,
1003     255,255,255,255,255,255,255,255,
1004     0,2,0,0,255,255,255,255,
1005     255,255,255,255,255,255,255,127,
1006     0,0,0,0,255,255,255,255,
1007     255,255,255,255,255,255,255,255,
1008     0,0,0,0,254,255,0,252,
1009     1,0,0,248,1,0,0,120,
1010     0,0,0,0,254,255,255,255,
1011     0,0,128,0,0,0,128,0,
1012     255,255,255,255,0,0,0,0,
1013     0,0,0,0,0,0,0,128,
1014     255,255,255,255,0,0,0,0,
1015     0,0,0,0,0,0,0,0,
1016     128,0,0,0,0,0,0,0,
1017     0,1,1,0,1,1,0,0,
1018     0,0,0,0,0,0,0,0,
1019     0,0,0,0,0,0,0,0,
1020     1,0,0,0,128,0,0,0,
1021     128,128,128,128,0,0,128,0,
1022     28,28,28,28,28,28,28,28,
1023     28,28,0,0,0,0,0,128,
1024     0,26,26,26,26,26,26,18,
1025     18,18,18,18,18,18,18,18,
1026     18,18,18,18,18,18,18,18,
1027     18,18,18,128,128,0,128,16,
1028     0,26,26,26,26,26,26,18,
1029     18,18,18,18,18,18,18,18,
1030     18,18,18,18,18,18,18,18,
1031     18,18,18,128,128,0,0,0,
1032     0,0,0,0,0,1,0,0,
1033     0,0,0,0,0,0,0,0,
1034     0,0,0,0,0,0,0,0,
1035     0,0,0,0,0,0,0,0,
1036     1,0,0,0,0,0,0,0,
1037     0,0,18,0,0,0,0,0,
1038     0,0,20,20,0,18,0,0,
1039     0,20,18,0,0,0,0,0,
1040     18,18,18,18,18,18,18,18,
1041     18,18,18,18,18,18,18,18,
1042     18,18,18,18,18,18,18,0,
1043     18,18,18,18,18,18,18,18,
1044     18,18,18,18,18,18,18,18,
1045     18,18,18,18,18,18,18,18,
1046     18,18,18,18,18,18,18,0,
1047     18,18,18,18,18,18,18,18
1048     };
1049    
1050    
1051    
1052 ph10 558
1053     #ifndef HAVE_STRERROR
1054 nigel 49 /*************************************************
1055 ph10 558 * Provide strerror() for non-ANSI libraries *
1056     *************************************************/
1057    
1058     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1059     in their libraries, but can provide the same facility by this simple
1060     alternative function. */
1061    
1062     extern int sys_nerr;
1063     extern char *sys_errlist[];
1064    
1065     char *
1066     strerror(int n)
1067     {
1068     if (n < 0 || n >= sys_nerr) return "unknown error number";
1069     return sys_errlist[n];
1070     }
1071     #endif /* HAVE_STRERROR */
1072    
1073    
1074 ph10 667 /*************************************************
1075     * JIT memory callback *
1076     *************************************************/
1077 ph10 558
1078 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
1079     {
1080     return (pcre_jit_stack *)arg;
1081     }
1082 ph10 558
1083 ph10 667
1084 ph10 836 #if !defined NOUTF || defined SUPPORT_PCRE16
1085 ph10 558 /*************************************************
1086 ph10 836 * Convert UTF-8 string to value *
1087     *************************************************/
1088    
1089     /* This function takes one or more bytes that represents a UTF-8 character,
1090     and returns the value of the character.
1091    
1092     Argument:
1093     utf8bytes a pointer to the byte vector
1094     vptr a pointer to an int to receive the value
1095    
1096     Returns: > 0 => the number of bytes consumed
1097     -6 to 0 => malformed UTF-8 character at offset = (-return)
1098     */
1099    
1100     static int
1101     utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1102     {
1103     int c = *utf8bytes++;
1104     int d = c;
1105     int i, j, s;
1106    
1107     for (i = -1; i < 6; i++) /* i is number of additional bytes */
1108     {
1109     if ((d & 0x80) == 0) break;
1110     d <<= 1;
1111     }
1112    
1113     if (i == -1) { *vptr = c; return 1; } /* ascii character */
1114     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1115    
1116     /* i now has a value in the range 1-5 */
1117    
1118     s = 6*i;
1119     d = (c & utf8_table3[i]) << s;
1120    
1121     for (j = 0; j < i; j++)
1122     {
1123     c = *utf8bytes++;
1124     if ((c & 0xc0) != 0x80) return -(j+1);
1125     s -= 6;
1126     d |= (c & 0x3f) << s;
1127     }
1128    
1129     /* Check that encoding was the correct unique one */
1130    
1131     for (j = 0; j < utf8_table1_size; j++)
1132     if (d <= utf8_table1[j]) break;
1133     if (j != i) return -(i+1);
1134    
1135     /* Valid value */
1136    
1137     *vptr = d;
1138     return i+1;
1139     }
1140     #endif /* NOUTF || SUPPORT_PCRE16 */
1141    
1142    
1143    
1144     #if !defined NOUTF || defined SUPPORT_PCRE16
1145     /*************************************************
1146     * Convert character value to UTF-8 *
1147     *************************************************/
1148    
1149     /* This function takes an integer value in the range 0 - 0x7fffffff
1150     and encodes it as a UTF-8 character in 0 to 6 bytes.
1151    
1152     Arguments:
1153     cvalue the character value
1154     utf8bytes pointer to buffer for result - at least 6 bytes long
1155    
1156     Returns: number of characters placed in the buffer
1157     */
1158    
1159     static int
1160     ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1161     {
1162     register int i, j;
1163     for (i = 0; i < utf8_table1_size; i++)
1164     if (cvalue <= utf8_table1[i]) break;
1165     utf8bytes += i;
1166     for (j = i; j > 0; j--)
1167     {
1168     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1169     cvalue >>= 6;
1170     }
1171     *utf8bytes = utf8_table2[i] | cvalue;
1172     return i + 1;
1173     }
1174 ph10 842 #endif
1175 ph10 836
1176    
1177     #ifdef SUPPORT_PCRE16
1178     /*************************************************
1179     * Convert a string to 16-bit *
1180     *************************************************/
1181    
1182     /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1183     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1184     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1185     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1186     result is always left in buffer16.
1187    
1188     Note that this function does not object to surrogate values. This is
1189     deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1190     for the purpose of testing that they are correctly faulted.
1191    
1192 ph10 842 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1193 ph10 836 in UTF-8 so that values greater than 255 can be handled.
1194    
1195     Arguments:
1196     data TRUE if converting a data line; FALSE for a regex
1197     p points to a byte string
1198     utf true if UTF-8 (to be converted to UTF-16)
1199     len number of bytes in the string (excluding trailing zero)
1200    
1201     Returns: number of 16-bit data items used (excluding trailing zero)
1202     OR -1 if a UTF-8 string is malformed
1203     OR -2 if a value > 0x10ffff is encountered
1204 ph10 842 OR -3 if a value > 0xffff is encountered when not in UTF mode
1205 ph10 836 */
1206    
1207     static int
1208     to16(int data, pcre_uint8 *p, int utf, int len)
1209     {
1210     pcre_uint16 *pp;
1211    
1212     if (buffer16_size < 2*len + 2)
1213     {
1214     if (buffer16 != NULL) free(buffer16);
1215     buffer16_size = 2*len + 2;
1216     buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1217     if (buffer16 == NULL)
1218     {
1219     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1220     exit(1);
1221     }
1222     }
1223    
1224     pp = buffer16;
1225    
1226     if (!utf && !data)
1227     {
1228     while (len-- > 0) *pp++ = *p++;
1229     }
1230    
1231     else
1232     {
1233     int c = 0;
1234     while (len > 0)
1235     {
1236     int chlen = utf82ord(p, &c);
1237     if (chlen <= 0) return -1;
1238     if (c > 0x10ffff) return -2;
1239     p += chlen;
1240     len -= chlen;
1241     if (c < 0x10000) *pp++ = c; else
1242     {
1243     if (!utf) return -3;
1244     c -= 0x10000;
1245     *pp++ = 0xD800 | (c >> 10);
1246     *pp++ = 0xDC00 | (c & 0x3ff);
1247     }
1248     }
1249     }
1250    
1251     *pp = 0;
1252     return pp - buffer16;
1253     }
1254     #endif
1255    
1256    
1257     /*************************************************
1258 nigel 91 * Read or extend an input line *
1259     *************************************************/
1260    
1261     /* Input lines are read into buffer, but both patterns and data lines can be
1262     continued over multiple input lines. In addition, if the buffer fills up, we
1263     want to automatically expand it so as to be able to handle extremely large
1264     lines that are needed for certain stress tests. When the input buffer is
1265     expanded, the other two buffers must also be expanded likewise, and the
1266     contents of pbuffer, which are a copy of the input for callouts, must be
1267     preserved (for when expansion happens for a data line). This is not the most
1268     optimal way of handling this, but hey, this is just a test program!
1269    
1270     Arguments:
1271     f the file to read
1272     start where in buffer to start (this *must* be within buffer)
1273 ph10 287 prompt for stdin or readline()
1274 nigel 91
1275     Returns: pointer to the start of new data
1276     could be a copy of start, or could be moved
1277     NULL if no data read and EOF reached
1278     */
1279    
1280 ph10 836 static pcre_uint8 *
1281     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1282 nigel 91 {
1283 ph10 836 pcre_uint8 *here = start;
1284 nigel 91
1285     for (;;)
1286     {
1287 ph10 904 size_t rlen = (size_t)(buffer_size - (here - buffer));
1288 nigel 93
1289 nigel 91 if (rlen > 1000)
1290     {
1291     int dlen;
1292 ph10 289
1293 ph10 287 /* If libreadline support is required, use readline() to read a line if the
1294     input is a terminal. Note that readline() removes the trailing newline, so
1295     we must put it back again, to be compatible with fgets(). */
1296 ph10 289
1297 ph10 287 #ifdef SUPPORT_LIBREADLINE
1298     if (isatty(fileno(f)))
1299     {
1300 ph10 289 size_t len;
1301 ph10 287 char *s = readline(prompt);
1302     if (s == NULL) return (here == start)? NULL : start;
1303     len = strlen(s);
1304 ph10 289 if (len > 0) add_history(s);
1305 ph10 287 if (len > rlen - 1) len = rlen - 1;
1306     memcpy(here, s, len);
1307     here[len] = '\n';
1308 ph10 289 here[len+1] = 0;
1309     free(s);
1310 ph10 287 }
1311 ph10 289 else
1312     #endif
1313    
1314 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
1315 ph10 289
1316 ph10 287 {
1317 ph10 516 if (f == stdin) printf("%s", prompt);
1318 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
1319     return (here == start)? NULL : start;
1320 ph10 289 }
1321    
1322 nigel 91 dlen = (int)strlen((char *)here);
1323     if (dlen > 0 && here[dlen - 1] == '\n') return start;
1324     here += dlen;
1325     }
1326    
1327     else
1328     {
1329     int new_buffer_size = 2*buffer_size;
1330 ph10 836 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1331     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1332     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1333 nigel 91
1334     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1335     {
1336     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1337     exit(1);
1338     }
1339    
1340     memcpy(new_buffer, buffer, buffer_size);
1341     memcpy(new_pbuffer, pbuffer, buffer_size);
1342    
1343     buffer_size = new_buffer_size;
1344    
1345     start = new_buffer + (start - buffer);
1346     here = new_buffer + (here - buffer);
1347    
1348     free(buffer);
1349     free(dbuffer);
1350     free(pbuffer);
1351    
1352     buffer = new_buffer;
1353     dbuffer = new_dbuffer;
1354     pbuffer = new_pbuffer;
1355     }
1356     }
1357    
1358     return NULL; /* Control never gets here */
1359     }
1360    
1361    
1362    
1363     /*************************************************
1364 nigel 63 * Read number from string *
1365     *************************************************/
1366    
1367     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1368     around with conditional compilation, just do the job by hand. It is only used
1369 nigel 93 for unpicking arguments, so just keep it simple.
1370 nigel 63
1371     Arguments:
1372     str string to be converted
1373     endptr where to put the end pointer
1374    
1375     Returns: the unsigned long
1376     */
1377    
1378     static int
1379 ph10 836 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1380 nigel 63 {
1381     int result = 0;
1382     while(*str != 0 && isspace(*str)) str++;
1383     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1384     *endptr = str;
1385     return(result);
1386     }
1387    
1388    
1389    
1390 nigel 49 /*************************************************
1391 ph10 836 * Print one character *
1392 nigel 49 *************************************************/
1393    
1394 ph10 836 /* Print a single character either literally, or as a hex escape. */
1395 nigel 49
1396 ph10 836 static int pchar(int c, FILE *f)
1397 nigel 49 {
1398 ph10 836 if (PRINTOK(c))
1399     {
1400     if (f != NULL) fprintf(f, "%c", c);
1401     return 1;
1402     }
1403 nigel 49
1404 ph10 836 if (c < 0x100)
1405 nigel 49 {
1406 ph10 836 if (use_utf)
1407     {
1408     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1409     return 6;
1410     }
1411     else
1412     {
1413     if (f != NULL) fprintf(f, "\\x%02x", c);
1414     return 4;
1415     }
1416 nigel 49 }
1417    
1418 ph10 836 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1419     return (c <= 0x000000ff)? 6 :
1420     (c <= 0x00000fff)? 7 :
1421     (c <= 0x0000ffff)? 8 :
1422     (c <= 0x000fffff)? 9 : 10;
1423     }
1424 nigel 49
1425    
1426    
1427 ph10 836 #ifdef SUPPORT_PCRE8
1428     /*************************************************
1429     * Print 8-bit character string *
1430     *************************************************/
1431 nigel 49
1432 ph10 836 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1433     If handed a NULL file, just counts chars without printing. */
1434 nigel 49
1435 ph10 836 static int pchars(pcre_uint8 *p, int length, FILE *f)
1436     {
1437     int c = 0;
1438     int yield = 0;
1439 nigel 49
1440 ph10 836 if (length < 0)
1441     length = strlen((char *)p);
1442 nigel 49
1443 ph10 836 while (length-- > 0)
1444     {
1445     #if !defined NOUTF
1446     if (use_utf)
1447     {
1448     int rc = utf82ord(p, &c);
1449     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1450     {
1451     length -= rc - 1;
1452     p += rc;
1453     yield += pchar(c, f);
1454     continue;
1455     }
1456     }
1457     #endif
1458     c = *p++;
1459     yield += pchar(c, f);
1460     }
1461    
1462     return yield;
1463 nigel 49 }
1464 nigel 79 #endif
1465 nigel 49
1466    
1467 nigel 79
1468 ph10 836 #ifdef SUPPORT_PCRE16
1469 nigel 63 /*************************************************
1470 ph10 836 * Find length of 0-terminated 16-bit string *
1471 nigel 85 *************************************************/
1472    
1473 ph10 836 static int strlen16(PCRE_SPTR16 p)
1474 nigel 85 {
1475 ph10 836 int len = 0;
1476     while (*p++ != 0) len++;
1477     return len;
1478 nigel 85 }
1479 ph10 836 #endif /* SUPPORT_PCRE16 */
1480 nigel 85
1481    
1482 ph10 836 #ifdef SUPPORT_PCRE16
1483 nigel 85 /*************************************************
1484 ph10 836 * Print 16-bit character string *
1485 nigel 63 *************************************************/
1486 nigel 49
1487 ph10 836 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1488     If handed a NULL file, just counts chars without printing. */
1489 nigel 49
1490 ph10 836 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1491 nigel 3 {
1492 nigel 63 int yield = 0;
1493 nigel 3
1494 ph10 836 if (length < 0)
1495     length = strlen16(p);
1496    
1497 nigel 63 while (length-- > 0)
1498 nigel 3 {
1499 ph10 836 int c = *p++ & 0xffff;
1500     #if !defined NOUTF
1501     if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1502 nigel 63 {
1503 ph10 836 int d = *p & 0xffff;
1504     if (d >= 0xDC00 && d < 0xDFFF)
1505 nigel 63 {
1506 ph10 836 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1507     length--;
1508     p++;
1509 nigel 63 }
1510     }
1511 nigel 79 #endif
1512 ph10 836 yield += pchar(c, f);
1513     }
1514 nigel 3
1515 ph10 836 return yield;
1516     }
1517     #endif /* SUPPORT_PCRE16 */
1518 nigel 63
1519 ph10 836
1520    
1521     #ifdef SUPPORT_PCRE8
1522     /*************************************************
1523     * Read a capture name (8-bit) and check it *
1524     *************************************************/
1525    
1526     static pcre_uint8 *
1527     read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1528     {
1529     pcre_uint8 *npp = *pp;
1530     while (isalnum(*p)) *npp++ = *p++;
1531     *npp++ = 0;
1532     *npp = 0;
1533     if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1534     {
1535     fprintf(outfile, "no parentheses with name \"");
1536     PCHARSV(*pp, 0, -1, outfile);
1537     fprintf(outfile, "\"\n");
1538 nigel 63 }
1539 nigel 3
1540 ph10 836 *pp = npp;
1541     return p;
1542 nigel 63 }
1543 ph10 836 #endif /* SUPPORT_PCRE8 */
1544 nigel 23
1545 nigel 3
1546 nigel 23
1547 ph10 836 #ifdef SUPPORT_PCRE16
1548 nigel 63 /*************************************************
1549 ph10 836 * Read a capture name (16-bit) and check it *
1550     *************************************************/
1551    
1552     /* Note that the text being read is 8-bit. */
1553    
1554     static pcre_uint8 *
1555     read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1556     {
1557     pcre_uint16 *npp = *pp;
1558     while (isalnum(*p)) *npp++ = *p++;
1559     *npp++ = 0;
1560     *npp = 0;
1561 zherczeg 852 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1562 ph10 836 {
1563     fprintf(outfile, "no parentheses with name \"");
1564     PCHARSV(*pp, 0, -1, outfile);
1565     fprintf(outfile, "\"\n");
1566     }
1567     *pp = npp;
1568     return p;
1569     }
1570     #endif /* SUPPORT_PCRE16 */
1571    
1572    
1573    
1574     /*************************************************
1575 nigel 63 * Callout function *
1576     *************************************************/
1577 nigel 3
1578 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1579     the match. Yield zero unless more callouts than the fail count, or the callout
1580     data is not zero. */
1581 nigel 3
1582 nigel 63 static int callout(pcre_callout_block *cb)
1583     {
1584     FILE *f = (first_callout | callout_extra)? outfile : NULL;
1585 nigel 75 int i, pre_start, post_start, subject_length;
1586 nigel 3
1587 nigel 63 if (callout_extra)
1588     {
1589     fprintf(f, "Callout %d: last capture = %d\n",
1590     cb->callout_number, cb->capture_last);
1591 nigel 3
1592 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
1593     {
1594     if (cb->offset_vector[i] < 0)
1595     fprintf(f, "%2d: <unset>\n", i/2);
1596     else
1597     {
1598     fprintf(f, "%2d: ", i/2);
1599 ph10 836 PCHARSV(cb->subject, cb->offset_vector[i],
1600 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1601     fprintf(f, "\n");
1602     }
1603     }
1604     }
1605 nigel 3
1606 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
1607     datails. On subsequent calls in the same match, we use pchars just to find the
1608     printed lengths of the substrings. */
1609 nigel 3
1610 nigel 63 if (f != NULL) fprintf(f, "--->");
1611 nigel 3
1612 ph10 836 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1613     PCHARS(post_start, cb->subject, cb->start_match,
1614 nigel 63 cb->current_position - cb->start_match, f);
1615 nigel 3
1616 ph10 836 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1617 nigel 75
1618 ph10 836 PCHARSV(cb->subject, cb->current_position,
1619 nigel 63 cb->subject_length - cb->current_position, f);
1620 nigel 3
1621 nigel 63 if (f != NULL) fprintf(f, "\n");
1622 nigel 9
1623 nigel 63 /* Always print appropriate indicators, with callout number if not already
1624 nigel 75 shown. For automatic callouts, show the pattern offset. */
1625 nigel 3
1626 nigel 75 if (cb->callout_number == 255)
1627     {
1628     fprintf(outfile, "%+3d ", cb->pattern_position);
1629     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1630     }
1631     else
1632     {
1633     if (callout_extra) fprintf(outfile, " ");
1634     else fprintf(outfile, "%3d ", cb->callout_number);
1635     }
1636 nigel 3
1637 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1638     fprintf(outfile, "^");
1639 nigel 3
1640 nigel 63 if (post_start > 0)
1641     {
1642     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1643     fprintf(outfile, "^");
1644 nigel 3 }
1645    
1646 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1647     fprintf(outfile, " ");
1648    
1649     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1650     pbuffer + cb->pattern_position);
1651    
1652 nigel 63 fprintf(outfile, "\n");
1653     first_callout = 0;
1654 nigel 3
1655 ph10 654 if (cb->mark != last_callout_mark)
1656 ph10 645 {
1657 ph10 836 if (cb->mark == NULL)
1658     fprintf(outfile, "Latest Mark: <unset>\n");
1659     else
1660     {
1661     fprintf(outfile, "Latest Mark: ");
1662     PCHARSV(cb->mark, 0, -1, outfile);
1663     putc('\n', outfile);
1664     }
1665 ph10 654 last_callout_mark = cb->mark;
1666     }
1667 ph10 645
1668 nigel 71 if (cb->callout_data != NULL)
1669 nigel 49 {
1670 nigel 71 int callout_data = *((int *)(cb->callout_data));
1671     if (callout_data != 0)
1672     {
1673     fprintf(outfile, "Callout data = %d\n", callout_data);
1674     return callout_data;
1675     }
1676 nigel 63 }
1677 nigel 49
1678 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
1679     (++callout_count >= callout_fail_count)? 1 : 0;
1680 nigel 3 }
1681    
1682    
1683 nigel 63 /*************************************************
1684 nigel 73 * Local malloc functions *
1685 nigel 63 *************************************************/
1686 nigel 3
1687 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1688 ph10 836 compiled re, which is the first store request that pcre_compile() makes. The
1689     show_malloc variable is set only during matching. */
1690 nigel 3
1691     static void *new_malloc(size_t size)
1692     {
1693 nigel 73 void *block = malloc(size);
1694 nigel 43 gotten_store = size;
1695 ph10 836 if (first_gotten_store == 0) first_gotten_store = size;
1696 nigel 73 if (show_malloc)
1697 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1698 nigel 73 return block;
1699 nigel 3 }
1700    
1701 nigel 73 static void new_free(void *block)
1702     {
1703     if (show_malloc)
1704     fprintf(outfile, "free %p\n", block);
1705     free(block);
1706     }
1707 nigel 3
1708 nigel 73 /* For recursion malloc/free, to test stacking calls */
1709    
1710     static void *stack_malloc(size_t size)
1711     {
1712     void *block = malloc(size);
1713     if (show_malloc)
1714 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1715 nigel 73 return block;
1716     }
1717    
1718     static void stack_free(void *block)
1719     {
1720     if (show_malloc)
1721     fprintf(outfile, "stack_free %p\n", block);
1722     free(block);
1723     }
1724    
1725    
1726 nigel 63 /*************************************************
1727     * Call pcre_fullinfo() *
1728     *************************************************/
1729 nigel 43
1730 ph10 836 /* Get one piece of information from the pcre_fullinfo() function. When only
1731     one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1732     value, but the code is defensive.
1733 nigel 43
1734 ph10 836 Arguments:
1735     re compiled regex
1736     study study data
1737     option PCRE_INFO_xxx option
1738     ptr where to put the data
1739    
1740     Returns: 0 when OK, < 0 on error
1741     */
1742    
1743     static int
1744     new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1745 nigel 43 {
1746     int rc;
1747 ph10 836
1748     if (use_pcre16)
1749     #ifdef SUPPORT_PCRE16
1750 zherczeg 852 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1751 ph10 836 #else
1752     rc = PCRE_ERROR_BADMODE;
1753     #endif
1754     else
1755     #ifdef SUPPORT_PCRE8
1756     rc = pcre_fullinfo(re, study, option, ptr);
1757     #else
1758     rc = PCRE_ERROR_BADMODE;
1759     #endif
1760    
1761     if (rc < 0)
1762     {
1763     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1764     use_pcre16? "16" : "", option);
1765     if (rc == PCRE_ERROR_BADMODE)
1766     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1767     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1768     }
1769    
1770     return rc;
1771 nigel 43 }
1772    
1773    
1774    
1775 nigel 63 /*************************************************
1776 ph10 836 * Swap byte functions *
1777 nigel 75 *************************************************/
1778    
1779 ph10 836 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1780     value, respectively.
1781    
1782     Arguments:
1783     value any number
1784    
1785     Returns: the byte swapped value
1786     */
1787    
1788     static pcre_uint32
1789     swap_uint32(pcre_uint32 value)
1790 nigel 75 {
1791     return ((value & 0x000000ff) << 24) |
1792     ((value & 0x0000ff00) << 8) |
1793     ((value & 0x00ff0000) >> 8) |
1794 ph10 836 (value >> 24);
1795 nigel 75 }
1796    
1797 ph10 836 static pcre_uint16
1798     swap_uint16(pcre_uint16 value)
1799     {
1800     return (value >> 8) | (value << 8);
1801     }
1802 nigel 75
1803    
1804    
1805     /*************************************************
1806 ph10 836 * Flip bytes in a compiled pattern *
1807     *************************************************/
1808    
1809     /* This function is called if the 'F' option was present on a pattern that is
1810     to be written to a file. We flip the bytes of all the integer fields in the
1811     regex data block and the study block. In 16-bit mode this also flips relevant
1812     bytes in the pattern itself. This is to make it possible to test PCRE's
1813     ability to reload byte-flipped patterns, e.g. those compiled on a different
1814     architecture. */
1815    
1816     static void
1817     regexflip(pcre *ere, pcre_extra *extra)
1818     {
1819 zherczeg 852 REAL_PCRE *re = (REAL_PCRE *)ere;
1820 ph10 836 #ifdef SUPPORT_PCRE16
1821     int op;
1822     pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1823     int length = re->name_count * re->name_entry_size;
1824     #ifdef SUPPORT_UTF
1825     BOOL utf = (re->options & PCRE_UTF16) != 0;
1826     BOOL utf16_char = FALSE;
1827     #endif /* SUPPORT_UTF */
1828     #endif /* SUPPORT_PCRE16 */
1829    
1830     /* Always flip the bytes in the main data block and study blocks. */
1831    
1832     re->magic_number = REVERSED_MAGIC_NUMBER;
1833     re->size = swap_uint32(re->size);
1834     re->options = swap_uint32(re->options);
1835     re->flags = swap_uint16(re->flags);
1836     re->top_bracket = swap_uint16(re->top_bracket);
1837     re->top_backref = swap_uint16(re->top_backref);
1838     re->first_char = swap_uint16(re->first_char);
1839     re->req_char = swap_uint16(re->req_char);
1840     re->name_table_offset = swap_uint16(re->name_table_offset);
1841     re->name_entry_size = swap_uint16(re->name_entry_size);
1842     re->name_count = swap_uint16(re->name_count);
1843    
1844     if (extra != NULL)
1845     {
1846     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1847     rsd->size = swap_uint32(rsd->size);
1848     rsd->flags = swap_uint32(rsd->flags);
1849     rsd->minlength = swap_uint32(rsd->minlength);
1850     }
1851    
1852     /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1853     in the name table, if present, and then in the pattern itself. */
1854    
1855     #ifdef SUPPORT_PCRE16
1856     if (!use_pcre16) return;
1857    
1858     while(TRUE)
1859     {
1860     /* Swap previous characters. */
1861     while (length-- > 0)
1862     {
1863     *ptr = swap_uint16(*ptr);
1864     ptr++;
1865     }
1866     #ifdef SUPPORT_UTF
1867     if (utf16_char)
1868     {
1869     if ((ptr[-1] & 0xfc00) == 0xd800)
1870     {
1871     /* We know that there is only one extra character in UTF-16. */
1872     *ptr = swap_uint16(*ptr);
1873     ptr++;
1874     }
1875     }
1876     utf16_char = FALSE;
1877     #endif /* SUPPORT_UTF */
1878    
1879     /* Get next opcode. */
1880    
1881     length = 0;
1882     op = *ptr;
1883     *ptr++ = swap_uint16(op);
1884    
1885     switch (op)
1886     {
1887     case OP_END:
1888     return;
1889    
1890     #ifdef SUPPORT_UTF
1891     case OP_CHAR:
1892     case OP_CHARI:
1893     case OP_NOT:
1894     case OP_NOTI:
1895     case OP_STAR:
1896     case OP_MINSTAR:
1897     case OP_PLUS:
1898     case OP_MINPLUS:
1899     case OP_QUERY:
1900     case OP_MINQUERY:
1901     case OP_UPTO:
1902     case OP_MINUPTO:
1903     case OP_EXACT:
1904     case OP_POSSTAR:
1905     case OP_POSPLUS:
1906     case OP_POSQUERY:
1907     case OP_POSUPTO:
1908     case OP_STARI:
1909     case OP_MINSTARI:
1910     case OP_PLUSI:
1911     case OP_MINPLUSI:
1912     case OP_QUERYI:
1913     case OP_MINQUERYI:
1914     case OP_UPTOI:
1915     case OP_MINUPTOI:
1916     case OP_EXACTI:
1917     case OP_POSSTARI:
1918     case OP_POSPLUSI:
1919     case OP_POSQUERYI:
1920     case OP_POSUPTOI:
1921     case OP_NOTSTAR:
1922     case OP_NOTMINSTAR:
1923     case OP_NOTPLUS:
1924     case OP_NOTMINPLUS:
1925     case OP_NOTQUERY:
1926     case OP_NOTMINQUERY:
1927     case OP_NOTUPTO:
1928     case OP_NOTMINUPTO:
1929     case OP_NOTEXACT:
1930     case OP_NOTPOSSTAR:
1931     case OP_NOTPOSPLUS:
1932     case OP_NOTPOSQUERY:
1933     case OP_NOTPOSUPTO:
1934     case OP_NOTSTARI:
1935     case OP_NOTMINSTARI:
1936     case OP_NOTPLUSI:
1937     case OP_NOTMINPLUSI:
1938     case OP_NOTQUERYI:
1939     case OP_NOTMINQUERYI:
1940     case OP_NOTUPTOI:
1941     case OP_NOTMINUPTOI:
1942     case OP_NOTEXACTI:
1943     case OP_NOTPOSSTARI:
1944     case OP_NOTPOSPLUSI:
1945     case OP_NOTPOSQUERYI:
1946     case OP_NOTPOSUPTOI:
1947     if (utf) utf16_char = TRUE;
1948     #endif
1949     /* Fall through. */
1950    
1951     default:
1952     length = OP_lengths16[op] - 1;
1953     break;
1954    
1955     case OP_CLASS:
1956     case OP_NCLASS:
1957     /* Skip the character bit map. */
1958     ptr += 32/sizeof(pcre_uint16);
1959     length = 0;
1960     break;
1961    
1962     case OP_XCLASS:
1963 zherczeg 839 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1964     if (LINK_SIZE > 1)
1965     length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1966     - (1 + LINK_SIZE + 1));
1967     else
1968     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1969    
1970 ph10 836 /* Reverse the size of the XCLASS instance. */
1971 zherczeg 839 *ptr = swap_uint16(*ptr);
1972 ph10 836 ptr++;
1973     if (LINK_SIZE > 1)
1974     {
1975 zherczeg 839 *ptr = swap_uint16(*ptr);
1976 ph10 836 ptr++;
1977     }
1978    
1979     op = *ptr;
1980     *ptr = swap_uint16(op);
1981 zherczeg 839 ptr++;
1982 ph10 836 if ((op & XCL_MAP) != 0)
1983     {
1984     /* Skip the character bit map. */
1985     ptr += 32/sizeof(pcre_uint16);
1986     length -= 32/sizeof(pcre_uint16);
1987     }
1988     break;
1989     }
1990     }
1991     /* Control should never reach here in 16 bit mode. */
1992     #endif /* SUPPORT_PCRE16 */
1993     }
1994    
1995    
1996    
1997     /*************************************************
1998 nigel 87 * Check match or recursion limit *
1999     *************************************************/
2000    
2001     static int
2002 ph10 836 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2003 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
2004     int flag, unsigned long int *limit, int errnumber, const char *msg)
2005     {
2006     int count;
2007     int min = 0;
2008     int mid = 64;
2009     int max = -1;
2010    
2011     extra->flags |= flag;
2012    
2013     for (;;)
2014     {
2015     *limit = mid;
2016    
2017 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2018 nigel 87 use_offsets, use_size_offsets);
2019    
2020     if (count == errnumber)
2021     {
2022     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023     min = mid;
2024     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2025     }
2026    
2027     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2028     count == PCRE_ERROR_PARTIAL)
2029     {
2030     if (mid == min + 1)
2031     {
2032     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2033     break;
2034     }
2035     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2036     max = mid;
2037     mid = (min + mid)/2;
2038     }
2039     else break; /* Some other error */
2040     }
2041    
2042     extra->flags &= ~flag;
2043     return count;
2044     }
2045    
2046    
2047    
2048     /*************************************************
2049 ph10 227 * Case-independent strncmp() function *
2050     *************************************************/
2051    
2052     /*
2053     Arguments:
2054     s first string
2055     t second string
2056     n number of characters to compare
2057    
2058     Returns: < 0, = 0, or > 0, according to the comparison
2059     */
2060    
2061     static int
2062 ph10 836 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2063 ph10 227 {
2064     while (n--)
2065     {
2066     int c = tolower(*s++) - tolower(*t++);
2067     if (c) return c;
2068     }
2069     return 0;
2070     }
2071    
2072    
2073    
2074     /*************************************************
2075 nigel 91 * Check newline indicator *
2076     *************************************************/
2077    
2078 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2079     a message and return 0 if there is no match.
2080 nigel 91
2081     Arguments:
2082     p points after the leading '<'
2083     f file for error message
2084    
2085     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2086     */
2087    
2088     static int
2089 ph10 836 check_newline(pcre_uint8 *p, FILE *f)
2090 nigel 91 {
2091 ph10 836 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2092     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2093     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2094     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2095     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2096     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2097     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2098 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
2099     return 0;
2100     }
2101    
2102    
2103    
2104     /*************************************************
2105 nigel 93 * Usage function *
2106     *************************************************/
2107    
2108     static void
2109     usage(void)
2110     {
2111 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2112     printf("Input and output default to stdin and stdout.\n");
2113     #ifdef SUPPORT_LIBREADLINE
2114     printf("If input is a terminal, readline() is used to read from it.\n");
2115     #else
2116     printf("This version of pcretest is not linked with readline().\n");
2117     #endif
2118     printf("\nOptions:\n");
2119 ph10 836 #ifdef SUPPORT_PCRE16
2120 ph10 862 printf(" -16 use the 16-bit library\n");
2121 ph10 836 #endif
2122 ph10 862 printf(" -b show compiled code\n");
2123 nigel 93 printf(" -C show PCRE compile-time options and exit\n");
2124 ph10 836 printf(" -C arg show a specific compile-time option\n");
2125     printf(" and exit with its value. The arg can be:\n");
2126     printf(" linksize internal link size [2, 3, 4]\n");
2127     printf(" pcre8 8 bit library support enabled [0, 1]\n");
2128     printf(" pcre16 16 bit library support enabled [0, 1]\n");
2129     printf(" utf Unicode Transformation Format supported [0, 1]\n");
2130     printf(" ucp Unicode Properties supported [0, 1]\n");
2131     printf(" jit Just-in-time compiler supported [0, 1]\n");
2132 zherczeg 839 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2133 nigel 93 printf(" -d debug: show compiled code and information (-b and -i)\n");
2134     #if !defined NODFA
2135     printf(" -dfa force DFA matching for all subjects\n");
2136     #endif
2137     printf(" -help show usage information\n");
2138     printf(" -i show information about compiled patterns\n"
2139 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
2140 nigel 93 " -m output memory used information\n"
2141     " -o <n> set size of offsets vector to <n>\n");
2142     #if !defined NOPOSIX
2143     printf(" -p use POSIX interface\n");
2144     #endif
2145     printf(" -q quiet: do not output PCRE version number at start\n");
2146     printf(" -S <n> set stack size to <n> megabytes\n");
2147 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
2148     " -s+ force each pattern to be studied, using JIT if available\n"
2149 ph10 922 " -s++ ditto, verifying when JIT was actually used\n"
2150 ph10 923 " -s+n force each pattern to be studied, using JIT if available,\n"
2151     " where 1 <= n <= 7 selects JIT options\n"
2152     " -s++n ditto, verifying when JIT was actually used\n"
2153 nigel 93 " -t time compilation and execution\n");
2154     printf(" -t <n> time compilation and execution, repeating <n> times\n");
2155     printf(" -tm time execution (matching) only\n");
2156     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2157     }
2158    
2159    
2160    
2161     /*************************************************
2162 nigel 63 * Main Program *
2163     *************************************************/
2164 nigel 43
2165 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
2166     consist of a regular expression, in delimiters and optionally followed by
2167     options, followed by a set of test data, terminated by an empty line. */
2168    
2169     int main(int argc, char **argv)
2170     {
2171     FILE *infile = stdin;
2172 ph10 836 const char *version;
2173 nigel 3 int options = 0;
2174     int study_options = 0;
2175 ph10 386 int default_find_match_limit = FALSE;
2176 nigel 3 int op = 1;
2177     int timeit = 0;
2178 nigel 93 int timeitm = 0;
2179 nigel 3 int showinfo = 0;
2180 nigel 31 int showstore = 0;
2181 ph10 667 int force_study = -1;
2182     int force_study_options = 0;
2183 nigel 87 int quiet = 0;
2184 nigel 53 int size_offsets = 45;
2185     int size_offsets_max;
2186 nigel 77 int *offsets = NULL;
2187 nigel 53 #if !defined NOPOSIX
2188 nigel 3 int posix = 0;
2189 nigel 53 #endif
2190 nigel 3 int debug = 0;
2191 nigel 11 int done = 0;
2192 nigel 77 int all_use_dfa = 0;
2193 ph10 922 int verify_jit = 0;
2194 nigel 77 int yield = 0;
2195 nigel 91 int stack_size;
2196 nigel 3
2197 ph10 667 pcre_jit_stack *jit_stack = NULL;
2198    
2199 ph10 836 /* These vectors store, end-to-end, a list of zero-terminated captured
2200     substring names, each list itself being terminated by an empty name. Assume
2201     that 1024 is plenty long enough for the few names we'll be testing. It is
2202     easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2203 ph10 881 for the actual memory, to ensure alignment. */
2204 ph10 667
2205 ph10 836 pcre_uint16 copynames[1024];
2206     pcre_uint16 getnames[1024];
2207 nigel 69
2208 ph10 881 #ifdef SUPPORT_PCRE16
2209 ph10 836 pcre_uint16 *cn16ptr;
2210     pcre_uint16 *gn16ptr;
2211 ph10 881 #endif
2212 nigel 91
2213 ph10 881 #ifdef SUPPORT_PCRE8
2214 ph10 836 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2215     pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2216     pcre_uint8 *cn8ptr;
2217     pcre_uint8 *gn8ptr;
2218 ph10 881 #endif
2219 nigel 91
2220 ph10 836 /* Get buffers from malloc() so that valgrind will check their misuse when
2221     debugging. They grow automatically when very long lines are read. The 16-bit
2222     buffer (buffer16) is obtained only if needed. */
2223 nigel 69
2224 ph10 836 buffer = (pcre_uint8 *)malloc(buffer_size);
2225     dbuffer = (pcre_uint8 *)malloc(buffer_size);
2226     pbuffer = (pcre_uint8 *)malloc(buffer_size);
2227 nigel 69
2228 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
2229 nigel 3
2230 nigel 93 outfile = stdout;
2231    
2232     /* The following _setmode() stuff is some Windows magic that tells its runtime
2233     library to translate CRLF into a single LF character. At least, that's what
2234     I've been told: never having used Windows I take this all on trust. Originally
2235     it set 0x8000, but then I was advised that _O_BINARY was better. */
2236    
2237 nigel 75 #if defined(_WIN32) || defined(WIN32)
2238 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
2239     #endif
2240 nigel 75
2241 ph10 836 /* Get the version number: both pcre_version() and pcre16_version() give the
2242     same answer. We just need to ensure that we call one that is available. */
2243    
2244     #ifdef SUPPORT_PCRE8
2245     version = pcre_version();
2246     #else
2247     version = pcre16_version();
2248     #endif
2249    
2250 nigel 3 /* Scan options */
2251    
2252     while (argc > 1 && argv[op][0] == '-')
2253     {
2254 ph10 836 pcre_uint8 *endptr;
2255 ph10 922 char *arg = argv[op];
2256 nigel 53
2257 ph10 922 if (strcmp(arg, "-m") == 0) showstore = 1;
2258     else if (strcmp(arg, "-s") == 0) force_study = 0;
2259    
2260     else if (strncmp(arg, "-s+", 3) == 0)
2261 ph10 667 {
2262 ph10 922 arg += 3;
2263     if (*arg == '+') { arg++; verify_jit = TRUE; }
2264 ph10 667 force_study = 1;
2265 ph10 923 if (*arg == 0)
2266     force_study_options = jit_study_bits[6];
2267     else if (*arg >= '1' && *arg <= '7')
2268     force_study_options = jit_study_bits[*arg - '1'];
2269     else goto BAD_ARG;
2270 ph10 691 }
2271 ph10 922 else if (strcmp(arg, "-16") == 0)
2272 ph10 836 {
2273     #ifdef SUPPORT_PCRE16
2274     use_pcre16 = 1;
2275     #else
2276     printf("** This version of PCRE was built without 16-bit support\n");
2277     exit(1);
2278     #endif
2279     }
2280 ph10 922 else if (strcmp(arg, "-q") == 0) quiet = 1;
2281     else if (strcmp(arg, "-b") == 0) debug = 1;
2282     else if (strcmp(arg, "-i") == 0) showinfo = 1;
2283     else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2284     else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2285 nigel 79 #if !defined NODFA
2286 ph10 922 else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2287 nigel 79 #endif
2288 ph10 922 else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2289 ph10 836 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2290 nigel 65 *endptr == 0))
2291 nigel 53 {
2292     op++;
2293     argc--;
2294     }
2295 ph10 922 else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2296 nigel 93 {
2297 ph10 922 int both = arg[2] == 0;
2298 nigel 93 int temp;
2299 ph10 836 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2300 nigel 93 *endptr == 0))
2301     {
2302     timeitm = temp;
2303     op++;
2304     argc--;
2305     }
2306     else timeitm = LOOPREPEAT;
2307     if (both) timeit = timeitm;
2308     }
2309 ph10 922 else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2310 ph10 836 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2311 nigel 91 *endptr == 0))
2312     {
2313 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2314 nigel 91 printf("PCRE: -S not supported on this OS\n");
2315     exit(1);
2316     #else
2317     int rc;
2318     struct rlimit rlim;
2319     getrlimit(RLIMIT_STACK, &rlim);
2320     rlim.rlim_cur = stack_size * 1024 * 1024;
2321     rc = setrlimit(RLIMIT_STACK, &rlim);
2322     if (rc != 0)
2323     {
2324     printf("PCRE: setrlimit() failed with error %d\n", rc);
2325     exit(1);
2326     }
2327     op++;
2328     argc--;
2329     #endif
2330     }
2331 nigel 53 #if !defined NOPOSIX
2332 ph10 922 else if (strcmp(arg, "-p") == 0) posix = 1;
2333 nigel 53 #endif
2334 ph10 922 else if (strcmp(arg, "-C") == 0)
2335 nigel 63 {
2336     int rc;
2337 ph10 392 unsigned long int lrc;
2338 ph10 836
2339     if (argc > 2)
2340     {
2341     if (strcmp(argv[op + 1], "linksize") == 0)
2342     {
2343     (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2344     printf("%d\n", rc);
2345     yield = rc;
2346     goto EXIT;
2347     }
2348     if (strcmp(argv[op + 1], "pcre8") == 0)
2349     {
2350     #ifdef SUPPORT_PCRE8
2351     printf("1\n");
2352     yield = 1;
2353     #else
2354     printf("0\n");
2355     yield = 0;
2356     #endif
2357     goto EXIT;
2358     }
2359     if (strcmp(argv[op + 1], "pcre16") == 0)
2360     {
2361     #ifdef SUPPORT_PCRE16
2362     printf("1\n");
2363     yield = 1;
2364     #else
2365     printf("0\n");
2366     yield = 0;
2367     #endif
2368     goto EXIT;
2369     }
2370     if (strcmp(argv[op + 1], "utf") == 0)
2371     {
2372     #ifdef SUPPORT_PCRE8
2373     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2374     printf("%d\n", rc);
2375     yield = rc;
2376     #else
2377     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2378     printf("%d\n", rc);
2379     yield = rc;
2380     #endif
2381     goto EXIT;
2382     }
2383     if (strcmp(argv[op + 1], "ucp") == 0)
2384     {
2385     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2386     printf("%d\n", rc);
2387     yield = rc;
2388     goto EXIT;
2389     }
2390     if (strcmp(argv[op + 1], "jit") == 0)
2391     {
2392     (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2393     printf("%d\n", rc);
2394     yield = rc;
2395     goto EXIT;
2396     }
2397 ph10 838 if (strcmp(argv[op + 1], "newline") == 0)
2398 ph10 842 {
2399 ph10 838 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2400     /* Note that these values are always the ASCII values, even
2401     in EBCDIC environments. CR is 13 and NL is 10. */
2402     printf("%s\n", (rc == 13)? "CR" :
2403     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2404     (rc == -2)? "ANYCRLF" :
2405     (rc == -1)? "ANY" : "???");
2406     goto EXIT;
2407 ph10 842 }
2408 ph10 838 printf("Unknown -C option: %s\n", argv[op + 1]);
2409 ph10 836 goto EXIT;
2410     }
2411    
2412     printf("PCRE version %s\n", version);
2413 nigel 63 printf("Compiled with\n");
2414 ph10 836
2415     /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2416     are set, either both UTFs are supported or both are not supported. */
2417    
2418     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2419     printf(" 8-bit and 16-bit support\n");
2420 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2421 ph10 836 if (rc)
2422     printf(" UTF-8 and UTF-16 support\n");
2423     else
2424     printf(" No UTF-8 or UTF-16 support\n");
2425     #elif defined SUPPORT_PCRE8
2426     printf(" 8-bit support only\n");
2427     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2428 nigel 63 printf(" %sUTF-8 support\n", rc? "" : "No ");
2429 ph10 836 #else
2430     printf(" 16-bit support only\n");
2431     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2432     printf(" %sUTF-16 support\n", rc? "" : "No ");
2433     #endif
2434    
2435     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2436 nigel 75 printf(" %sUnicode properties support\n", rc? "" : "No ");
2437 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2438 ph10 674 if (rc)
2439 ph10 890 {
2440     const char *arch;
2441 ph10 908 (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2442 ph10 890 printf(" Just-in-time compiler support: %s\n", arch);
2443 ph10 903 }
2444 ph10 674 else
2445     printf(" No just-in-time compiler support\n");
2446 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2447 ph10 391 /* Note that these values are always the ASCII values, even
2448 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
2449 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2450     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2451 ph10 150 (rc == -2)? "ANYCRLF" :
2452 nigel 93 (rc == -1)? "ANY" : "???");
2453 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2454 ph10 231 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2455     "all Unicode newlines");
2456 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2457 nigel 63 printf(" Internal link size = %d\n", rc);
2458 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2459 nigel 63 printf(" POSIX malloc threshold = %d\n", rc);
2460 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2461 ph10 376 printf(" Default match limit = %ld\n", lrc);
2462 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2463 ph10 376 printf(" Default recursion depth limit = %ld\n", lrc);
2464 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2465 ph10 895 printf(" Match recursion uses %s", rc? "stack" : "heap");
2466     if (showstore)
2467 ph10 903 {
2468 ph10 901 PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2469 ph10 903 printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2470 ph10 895 }
2471 ph10 903 printf("\n");
2472 ph10 121 goto EXIT;
2473 nigel 63 }
2474 ph10 922 else if (strcmp(arg, "-help") == 0 ||
2475     strcmp(arg, "--help") == 0)
2476 nigel 93 {
2477     usage();
2478     goto EXIT;
2479     }
2480 nigel 3 else
2481     {
2482 ph10 922 BAD_ARG:
2483     printf("** Unknown or malformed option %s\n", arg);
2484 nigel 93 usage();
2485 nigel 77 yield = 1;
2486     goto EXIT;
2487 nigel 3 }
2488     op++;
2489     argc--;
2490     }
2491    
2492 nigel 53 /* Get the store for the offsets vector, and remember what it was */
2493    
2494     size_offsets_max = size_offsets;
2495 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2496 nigel 53 if (offsets == NULL)
2497     {
2498     printf("** Failed to get %d bytes of memory for offsets vector\n",
2499 ph10 151 (int)(size_offsets_max * sizeof(int)));
2500 nigel 77 yield = 1;
2501     goto EXIT;
2502 nigel 53 }
2503    
2504 nigel 3 /* Sort out the input and output files */
2505    
2506     if (argc > 1)
2507     {
2508 nigel 93 infile = fopen(argv[op], INPUT_MODE);
2509 nigel 3 if (infile == NULL)
2510     {
2511     printf("** Failed to open %s\n", argv[op]);
2512 nigel 77 yield = 1;
2513     goto EXIT;
2514 nigel 3 }
2515     }
2516    
2517     if (argc > 2)
2518     {
2519 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
2520 nigel 3 if (outfile == NULL)
2521     {
2522     printf("** Failed to open %s\n", argv[op+1]);
2523 nigel 77 yield = 1;
2524     goto EXIT;
2525 nigel 3 }
2526     }
2527    
2528     /* Set alternative malloc function */
2529    
2530 ph10 836 #ifdef SUPPORT_PCRE8
2531 nigel 3 pcre_malloc = new_malloc;
2532 nigel 73 pcre_free = new_free;
2533     pcre_stack_malloc = stack_malloc;
2534     pcre_stack_free = stack_free;
2535 ph10 836 #endif
2536 nigel 3
2537 ph10 836 #ifdef SUPPORT_PCRE16
2538     pcre16_malloc = new_malloc;
2539     pcre16_free = new_free;
2540     pcre16_stack_malloc = stack_malloc;
2541     pcre16_stack_free = stack_free;
2542     #endif
2543    
2544 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
2545 nigel 3
2546 ph10 836 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2547 nigel 3
2548     /* Main loop */
2549    
2550 nigel 11 while (!done)
2551 nigel 3 {
2552     pcre *re = NULL;
2553     pcre_extra *extra = NULL;
2554 nigel 37
2555     #if !defined NOPOSIX /* There are still compilers that require no indent */
2556 nigel 3 regex_t preg;
2557 nigel 45 int do_posix = 0;
2558 nigel 37 #endif
2559    
2560 nigel 7 const char *error;
2561 ph10 836 pcre_uint8 *markptr;
2562     pcre_uint8 *p, *pp, *ppp;
2563     pcre_uint8 *to_file = NULL;
2564     const pcre_uint8 *tables = NULL;
2565 zherczeg 847 unsigned long int get_options;
2566 nigel 75 unsigned long int true_size, true_study_size = 0;
2567     size_t size, regex_gotten_store;
2568 ph10 654 int do_allcaps = 0;
2569 ph10 512 int do_mark = 0;
2570 nigel 3 int do_study = 0;
2571 ph10 654 int no_force_study = 0;
2572 nigel 25 int do_debug = debug;
2573 nigel 35 int do_G = 0;
2574     int do_g = 0;
2575 nigel 25 int do_showinfo = showinfo;
2576 nigel 35 int do_showrest = 0;
2577 ph10 616 int do_showcaprest = 0;
2578 nigel 75 int do_flip = 0;
2579 nigel 93 int erroroffset, len, delimiter, poffset;
2580 nigel 3
2581 ph10 836 use_utf = 0;
2582 ph10 211 debug_lengths = 1;
2583 nigel 63
2584 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2585 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2586 nigel 63 fflush(outfile);
2587 nigel 3
2588     p = buffer;
2589     while (isspace(*p)) p++;
2590     if (*p == 0) continue;
2591    
2592 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
2593 nigel 3
2594 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2595     {
2596 zherczeg 841 pcre_uint32 magic;
2597 ph10 836 pcre_uint8 sbuf[8];
2598 nigel 75 FILE *f;
2599    
2600     p++;
2601 zherczeg 839 if (*p == '!')
2602     {
2603     do_debug = TRUE;
2604     do_showinfo = TRUE;
2605     p++;
2606     }
2607    
2608 nigel 75 pp = p + (int)strlen((char *)p);
2609     while (isspace(pp[-1])) pp--;
2610     *pp = 0;
2611    
2612     f = fopen((char *)p, "rb");
2613     if (f == NULL)
2614     {
2615     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2616     continue;
2617     }
2618    
2619 zherczeg 839 first_gotten_store = 0;
2620 nigel 75 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2621    
2622     true_size =
2623     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2624     true_study_size =
2625     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2626    
2627 zherczeg 852 re = (pcre *)new_malloc(true_size);
2628 ph10 836 regex_gotten_store = first_gotten_store;
2629 nigel 75
2630     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2631    
2632 zherczeg 852 magic = ((REAL_PCRE *)re)->magic_number;
2633 nigel 75 if (magic != MAGIC_NUMBER)
2634     {
2635 ph10 836 if (swap_uint32(magic) == MAGIC_NUMBER)
2636 nigel 75 {
2637     do_flip = 1;
2638     }
2639     else
2640     {
2641     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2642     fclose(f);
2643     continue;
2644     }
2645     }
2646    
2647 zherczeg 839 /* We hide the byte-invert info for little and big endian tests. */
2648 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2649 zherczeg 839 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2650 nigel 75
2651 ph10 612 /* Now see if there is any following study data. */
2652 nigel 75
2653     if (true_study_size != 0)
2654     {
2655     pcre_study_data *psd;
2656    
2657     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2658     extra->flags = PCRE_EXTRA_STUDY_DATA;
2659    
2660     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2661     extra->study_data = psd;
2662    
2663     if (fread(psd, 1, true_study_size, f) != true_study_size)
2664     {
2665     FAIL_READ:
2666     fprintf(outfile, "Failed to read data from %s\n", p);
2667 ph10 836 if (extra != NULL)
2668     {
2669     PCRE_FREE_STUDY(extra);
2670     }
2671 nigel 75 if (re != NULL) new_free(re);
2672     fclose(f);
2673     continue;
2674     }
2675     fprintf(outfile, "Study data loaded from %s\n", p);
2676     do_study = 1; /* To get the data output if requested */
2677     }
2678     else fprintf(outfile, "No study data\n");
2679    
2680 ph10 836 /* Flip the necessary bytes. */
2681     if (do_flip)
2682     {
2683 zherczeg 839 int rc;
2684     PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2685     if (rc == PCRE_ERROR_BADMODE)
2686     {
2687     /* Simulate the result of the function call below. */
2688     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2689     use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2690     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2691     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2692     continue;
2693     }
2694 ph10 836 }
2695    
2696     /* Need to know if UTF-8 for printing data strings. */
2697    
2698     if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2699     use_utf = (get_options & PCRE_UTF8) != 0;
2700    
2701 nigel 75 fclose(f);
2702     goto SHOW_INFO;
2703     }
2704    
2705     /* In-line pattern (the usual case). Get the delimiter and seek the end of
2706 ph10 836 the pattern; if it isn't complete, read more. */
2707 nigel 75
2708 nigel 3 delimiter = *p++;
2709    
2710 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
2711 nigel 3 {
2712 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2713 nigel 3 goto SKIP_DATA;
2714     }
2715    
2716     pp = p;
2717 ph10 530 poffset = (int)(p - buffer);
2718 nigel 3
2719     for(;;)
2720     {
2721 nigel 29 while (*pp != 0)
2722     {
2723     if (*pp == '\\' && pp[1] != 0) pp++;
2724     else if (*pp == delimiter) break;
2725     pp++;
2726     }
2727 nigel 3 if (*pp != 0) break;
2728 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2729 nigel 3 {
2730     fprintf(outfile, "** Unexpected EOF\n");
2731 nigel 11 done = 1;
2732     goto CONTINUE;
2733 nigel 3 }
2734 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2735 nigel 3 }
2736    
2737 nigel 93 /* The buffer may have moved while being extended; reset the start of data
2738     pointer to the correct relative point in the buffer. */
2739    
2740     p = buffer + poffset;
2741    
2742 nigel 29 /* If the first character after the delimiter is backslash, make
2743     the pattern end with backslash. This is purely to provide a way
2744     of testing for the error message when a pattern ends with backslash. */
2745    
2746     if (pp[1] == '\\') *pp++ = '\\';
2747    
2748 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2749     for callouts. */
2750 nigel 3
2751     *pp++ = 0;
2752 nigel 75 strcpy((char *)pbuffer, (char *)p);
2753 nigel 3
2754     /* Look for options after final delimiter */
2755    
2756     options = 0;
2757 ph10 836 study_options = 0;
2758 nigel 31 log_store = showstore; /* default from command line */
2759    
2760 nigel 3 while (*pp != 0)
2761     {
2762     switch (*pp++)
2763     {
2764 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
2765 nigel 35 case 'g': do_g = 1; break;
2766 nigel 3 case 'i': options |= PCRE_CASELESS; break;
2767     case 'm': options |= PCRE_MULTILINE; break;
2768     case 's': options |= PCRE_DOTALL; break;
2769     case 'x': options |= PCRE_EXTENDED; break;
2770 nigel 25
2771 ph10 616 case '+':
2772 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2773 ph10 616 break;
2774 ph10 654
2775     case '=': do_allcaps = 1; break;
2776 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
2777 nigel 93 case 'B': do_debug = 1; break;
2778 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2779 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
2780 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2781 nigel 75 case 'F': do_flip = 1; break;
2782 nigel 35 case 'G': do_G = 1; break;
2783 nigel 25 case 'I': do_showinfo = 1; break;
2784 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
2785 ph10 512 case 'K': do_mark = 1; break;
2786 nigel 31 case 'M': log_store = 1; break;
2787 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2788 nigel 37
2789     #if !defined NOPOSIX
2790 nigel 3 case 'P': do_posix = 1; break;
2791 nigel 37 #endif
2792    
2793 ph10 654 case 'S':
2794 ph10 691 if (do_study == 0)
2795 ph10 612 {
2796 ph10 691 do_study = 1;
2797 ph10 667 if (*pp == '+')
2798     {
2799 ph10 922 if (*(++pp) == '+')
2800     {
2801     verify_jit = TRUE;
2802     pp++;
2803     }
2804 ph10 923 if (*pp >= '1' && *pp <= '7')
2805     study_options |= jit_study_bits[*pp++ - '1'];
2806     else
2807     study_options |= jit_study_bits[6];
2808 ph10 691 }
2809     }
2810 ph10 667 else
2811     {
2812 ph10 612 do_study = 0;
2813     no_force_study = 1;
2814 ph10 654 }
2815 ph10 612 break;
2816    
2817 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
2818 ph10 535 case 'W': options |= PCRE_UCP; break;
2819 nigel 3 case 'X': options |= PCRE_EXTRA; break;
2820 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2821 ph10 126 case 'Z': debug_lengths = 0; break;
2822 ph10 836 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2823 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2824 ph10 545
2825 ph10 541 case 'T':
2826     switch (*pp++)
2827     {
2828     case '0': tables = tables0; break;
2829     case '1': tables = tables1; break;
2830 ph10 545
2831 ph10 541 case '\r':
2832     case '\n':
2833 ph10 545 case ' ':
2834     case 0:
2835 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
2836 ph10 545 goto SKIP_DATA;
2837    
2838     default:
2839 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2840 ph10 545 goto SKIP_DATA;
2841 ph10 541 }
2842 ph10 545 break;
2843 nigel 25
2844     case 'L':
2845     ppp = pp;
2846 nigel 93 /* The '\r' test here is so that it works on Windows. */
2847     /* The '0' test is just in case this is an unterminated line. */
2848     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2849 nigel 25 *ppp = 0;
2850     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2851     {
2852     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2853     goto SKIP_DATA;
2854     }
2855 nigel 93 locale_set = 1;
2856 ph10 836 tables = PCRE_MAKETABLES;
2857 nigel 25 pp = ppp;
2858     break;
2859    
2860 nigel 75 case '>':
2861     to_file = pp;
2862     while (*pp != 0) pp++;
2863     while (isspace(pp[-1])) pp--;
2864     *pp = 0;
2865     break;
2866    
2867 nigel 91 case '<':
2868     {
2869 ph10 836 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2870 ph10 336 {
2871     options |= PCRE_JAVASCRIPT_COMPAT;
2872 ph10 345 pp += 3;
2873 ph10 336 }
2874     else
2875 ph10 345 {
2876 ph10 336 int x = check_newline(pp, outfile);
2877     if (x == 0) goto SKIP_DATA;
2878     options |= x;
2879     while (*pp++ != '>');
2880 ph10 345 }
2881 nigel 91 }
2882     break;
2883    
2884 nigel 77 case '\r': /* So that it works in Windows */
2885     case '\n':
2886     case ' ':
2887     break;
2888 nigel 75
2889 nigel 3 default:
2890     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2891     goto SKIP_DATA;
2892     }
2893     }
2894    
2895 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
2896 nigel 25 timing, showing, or debugging options, nor the ability to pass over
2897 ph10 836 local character tables. Neither does it have 16-bit support. */
2898 nigel 3
2899 nigel 37 #if !defined NOPOSIX
2900 nigel 3 if (posix || do_posix)
2901     {
2902     int rc;
2903     int cflags = 0;
2904 nigel 75
2905 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2906     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2907 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2908 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2909     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2910 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2911 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2912 nigel 87
2913 ph10 836 first_gotten_store = 0;
2914 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
2915    
2916     /* Compilation failed; go back for another re, skipping to blank line
2917     if non-interactive. */
2918    
2919     if (rc != 0)
2920     {
2921 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2922 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2923     goto SKIP_DATA;
2924     }
2925     }
2926    
2927     /* Handle compiling via the native interface */
2928    
2929     else
2930 nigel 37 #endif /* !defined NOPOSIX */
2931    
2932 nigel 3 {
2933 ph10 836 /* In 16-bit mode, convert the input. */
2934    
2935     #ifdef SUPPORT_PCRE16
2936     if (use_pcre16)
2937     {
2938     switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2939     {
2940     case -1:
2941     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2942     "converted to UTF-16\n");
2943     goto SKIP_DATA;
2944    
2945     case -2:
2946     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2947     "cannot be converted to UTF-16\n");
2948     goto SKIP_DATA;
2949 ph10 842
2950 ph10 836 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2951     fprintf(outfile, "**Failed: character value greater than 0xffff "
2952     "cannot be converted to 16-bit in non-UTF mode\n");
2953 ph10 842 goto SKIP_DATA;
2954 ph10 836
2955     default:
2956     break;
2957     }
2958     p = (pcre_uint8 *)buffer16;
2959     }
2960     #endif
2961    
2962     /* Compile many times when timing */
2963    
2964 nigel 93 if (timeit > 0)
2965 nigel 3 {
2966     register int i;
2967     clock_t time_taken;
2968     clock_t start_time = clock();
2969 nigel 93 for (i = 0; i < timeit; i++)
2970 nigel 3 {
2971 ph10 836 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2972 nigel 3 if (re != NULL) free(re);
2973     }
2974     time_taken = clock() - start_time;
2975 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
2976     (((double)time_taken * 1000.0) / (double)timeit) /
2977 nigel 63 (double)CLOCKS_PER_SEC);
2978 nigel 3 }
2979    
2980 ph10 836 first_gotten_store = 0;
2981     PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2982 nigel 3
2983     /* Compilation failed; go back for another re, skipping to blank line
2984     if non-interactive. */
2985    
2986     if (re == NULL)
2987     {
2988     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2989     SKIP_DATA:
2990     if (infile != stdin)
2991     {
2992     for (;;)
2993     {
2994 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
2995 nigel 11 {
2996     done = 1;
2997     goto CONTINUE;
2998     }
2999 nigel 3 len = (int)strlen((char *)buffer);
3000     while (len > 0 && isspace(buffer[len-1])) len--;
3001     if (len == 0) break;
3002     }
3003     fprintf(outfile, "\n");
3004     }
3005 nigel 25 goto CONTINUE;
3006 nigel 3 }
3007 ph10 416
3008     /* Compilation succeeded. It is now possible to set the UTF-8 option from
3009     within the regex; check for this so that we know how to process the data
3010 ph10 412 lines. */
3011 ph10 416
3012 ph10 836 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3013     goto SKIP_DATA;
3014     if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3015 nigel 3
3016 ph10 836 /* Extract the size for possible writing before possibly flipping it,
3017     and remember the store that was got. */
3018 nigel 3
3019 zherczeg 852 true_size = ((REAL_PCRE *)re)->size;
3020 ph10 836 regex_gotten_store = first_gotten_store;
3021    
3022     /* Output code size information if requested */
3023    
3024 nigel 63 if (log_store)
3025     fprintf(outfile, "Memory allocation (code space): %d\n",
3026 ph10 836 (int)(first_gotten_store -
3027 zherczeg 852 sizeof(REAL_PCRE) -
3028     ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3029 nigel 63
3030 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
3031 ph10 654 help with the matching, unless the pattern has the SS option, which
3032 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
3033     never sensible). */
3034 nigel 75
3035 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
3036 nigel 75 {
3037 nigel 93 if (timeit > 0)
3038 nigel 75 {
3039     register int i;
3040     clock_t time_taken;
3041     clock_t start_time = clock();
3042 nigel 93 for (i = 0; i < timeit; i++)
3043 ph10 836 {
3044     PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3045     }
3046 nigel 75 time_taken = clock() - start_time;
3047 ph10 836 if (extra != NULL)
3048     {
3049     PCRE_FREE_STUDY(extra);
3050     }
3051 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
3052     (((double)time_taken * 1000.0) / (double)timeit) /
3053 nigel 75 (double)CLOCKS_PER_SEC);
3054     }
3055 ph10 836 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3056 nigel 75 if (error != NULL)
3057     fprintf(outfile, "Failed to study: %s\n", error);
3058     else if (extra != NULL)
3059 ph10 836 {
3060 nigel 75 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3061 ph10 836 if (log_store)
3062     {
3063     size_t jitsize;
3064     if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3065     jitsize != 0)
3066     fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3067     }
3068     }
3069 nigel 75 }
3070 ph10 788
3071 ph10 510 /* If /K was present, we set up for handling MARK data. */
3072 ph10 512
3073 ph10 510 if (do_mark)
3074     {
3075     if (extra == NULL)
3076     {
3077     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3078     extra->flags = 0;
3079     }
3080 ph10 512 extra->mark = &markptr;
3081 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
3082 ph10 512 }
3083 nigel 75
3084 ph10 836 /* Extract and display information from the compiled data if required. */
3085 nigel 75
3086     SHOW_INFO:
3087    
3088 nigel 93 if (do_debug)
3089     {
3090     fprintf(outfile, "------------------------------------------------------------------\n");
3091 ph10 836 PCRE_PRINTINT(re, outfile, debug_lengths);
3092 nigel 93 }
3093 ph10 416
3094 ph10 412 /* We already have the options in get_options (see above) */
3095 nigel 93
3096 nigel 25 if (do_showinfo)
3097 nigel 3 {
3098 ph10 412 unsigned long int all_options;
3099 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3100 ph10 227 hascrorlf;
3101 nigel 63 int nameentrysize, namecount;
3102 ph10 836 const pcre_uint8 *nametable;
3103 nigel 3
3104 ph10 836 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3105     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3106     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3107     new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3108     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3109     new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3110     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3111     new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3112     new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3113     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3114     new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3115     != 0)
3116     goto SKIP_DATA;
3117 nigel 43
3118 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
3119 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3120 nigel 77 (int)size, (int)regex_gotten_store);
3121 nigel 43
3122     fprintf(outfile, "Capturing subpattern count = %d\n", count);
3123     if (backrefmax > 0)
3124     fprintf(outfile, "Max back reference = %d\n", backrefmax);
3125 nigel 63
3126     if (namecount > 0)
3127     {
3128     fprintf(outfile, "Named capturing subpatterns:\n");
3129     while (namecount-- > 0)
3130     {
3131 ph10 836 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3132     int imm2_size = use_pcre16 ? 1 : 2;
3133     #else
3134     int imm2_size = IMM2_SIZE;
3135     #endif
3136     int length = (int)STRLEN(nametable + imm2_size);
3137     fprintf(outfile, " ");
3138     PCHARSV(nametable, imm2_size, length, outfile);
3139     while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3140     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3141     fprintf(outfile, "%3d\n", use_pcre16?
3142     (int)(((PCRE_SPTR16)nametable)[0])
3143     :((int)nametable[0] << 8) | (int)nametable[1]);
3144     nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3145     #else
3146     fprintf(outfile, "%3d\n", GET2(nametable, 0));
3147     #ifdef SUPPORT_PCRE8
3148 nigel 63 nametable += nameentrysize;
3149 ph10 836 #else
3150     nametable += nameentrysize * 2;
3151     #endif
3152     #endif
3153 nigel 63 }
3154     }
3155 ph10 172
3156 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3157 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3158 nigel 63
3159 zherczeg 852 all_options = ((REAL_PCRE *)re)->options;
3160 ph10 836 if (do_flip) all_options = swap_uint32(all_options);
3161 nigel 75
3162 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
3163 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3164 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3165     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3166     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3167     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3168 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3169 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3170 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3171     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3172 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3173     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3174     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3175 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3176 ph10 836 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3177 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3178 ph10 836 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3179 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3180 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3181 ph10 172
3182 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3183 nigel 43
3184 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
3185 nigel 91 {
3186     case PCRE_NEWLINE_CR:
3187     fprintf(outfile, "Forced newline sequence: CR\n");
3188     break;
3189 nigel 43
3190 nigel 91 case PCRE_NEWLINE_LF:
3191     fprintf(outfile, "Forced newline sequence: LF\n");
3192     break;
3193    
3194     case PCRE_NEWLINE_CRLF:
3195     fprintf(outfile, "Forced newline sequence: CRLF\n");
3196     break;
3197    
3198 ph10 149 case PCRE_NEWLINE_ANYCRLF:
3199     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3200     break;
3201    
3202 nigel 93 case PCRE_NEWLINE_ANY:
3203     fprintf(outfile, "Forced newline sequence: ANY\n");
3204     break;
3205    
3206 nigel 91 default:
3207     break;
3208     }
3209    
3210 nigel 43 if (first_char == -1)
3211     {
3212 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
3213 nigel 43 }
3214     else if (first_char < 0)
3215     {
3216     fprintf(outfile, "No first char\n");
3217     }
3218     else
3219     {
3220 ph10 836 const char *caseless =
3221 zherczeg 852 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3222 nigel 63 "" : " (caseless)";
3223 ph10 836
3224     if (PRINTOK(first_char))
3225     fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3226 nigel 3 else
3227 ph10 836 {
3228     fprintf(outfile, "First char = ");
3229     pchar(first_char, outfile);
3230     fprintf(outfile, "%s\n", caseless);
3231     }
3232 nigel 43 }
3233 nigel 37
3234 nigel 43 if (need_char < 0)
3235     {
3236     fprintf(outfile, "No need char\n");
3237 nigel 3 }
3238 nigel 43 else
3239     {
3240 ph10 836 const char *caseless =
3241 zherczeg 852 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3242 nigel 63 "" : " (caseless)";
3243 ph10 836
3244     if (PRINTOK(need_char))
3245     fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3246 nigel 43 else
3247 ph10 836 {
3248     fprintf(outfile, "Need char = ");
3249     pchar(need_char, outfile);
3250     fprintf(outfile, "%s\n", caseless);
3251     }
3252 nigel 43 }
3253 nigel 75
3254     /* Don't output study size; at present it is in any case a fixed
3255     value, but it varies, depending on the computer architecture, and
3256     so messes up the test suite. (And with the /F option, it might be
3257 ph10 654 flipped.) If study was forced by an external -s, don't show this
3258 ph10 612 information unless -i or -d was also present. This means that, except
3259     when auto-callouts are involved, the output from runs with and without
3260     -s should be identical. */
3261 nigel 75
3262 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3263 nigel 75 {
3264     if (extra == NULL)
3265     fprintf(outfile, "Study returned NULL\n");
3266     else
3267     {
3268 ph10 836 pcre_uint8 *start_bits = NULL;
3269 ph10 455 int minlength;
3270 ph10 461
3271 ph10 836 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3272     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3273 ph10 461
3274 ph10 836 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3275 nigel 75 {
3276 ph10 836 if (start_bits == NULL)
3277     fprintf(outfile, "No set of starting bytes\n");
3278     else
3279 nigel 75 {
3280 ph10 836 int i;
3281     int c = 24;
3282     fprintf(outfile, "Starting byte set: ");
3283     for (i = 0; i < 256; i++)
3284 nigel 75 {
3285 ph10 836 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3286 nigel 75 {
3287 ph10 836 if (c > 75)
3288     {
3289     fprintf(outfile, "\n ");
3290     c = 2;
3291     }
3292     if (PRINTOK(i) && i != ' ')
3293     {
3294     fprintf(outfile, "%c ", i);
3295     c += 2;
3296     }
3297     else
3298     {
3299     fprintf(outfile, "\\x%02x ", i);
3300     c += 5;
3301     }
3302 nigel 75 }
3303     }
3304 ph10 836 fprintf(outfile, "\n");
3305 nigel 75 }
3306     }
3307     }
3308 ph10 691
3309 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
3310 ph10 691
3311 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3312     {
3313 ph10 691 int jit;
3314 ph10 836 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3315     {
3316     if (jit)
3317     fprintf(outfile, "JIT study was successful\n");
3318     else
3319 ph10 691 #ifdef SUPPORT_JIT
3320 ph10 836 fprintf(outfile, "JIT study was not successful\n");
3321 ph10 667 #else
3322 ph10 836 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3323 ph10 667 #endif
3324 ph10 836 }
3325 ph10 691 }
3326 nigel 75 }
3327 nigel 3 }
3328    
3329 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
3330     that is all. The first 8 bytes of the file are the regex length and then
3331     the study length, in big-endian order. */
3332 nigel 3
3333 nigel 75 if (to_file != NULL)
3334 nigel 3 {
3335 nigel 75 FILE *f = fopen((char *)to_file, "wb");
3336     if (f == NULL)
3337 nigel 3 {
3338 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3339 nigel 3 }
3340 nigel 75 else
3341     {
3342 ph10 836 pcre_uint8 sbuf[8];
3343 ph10 259
3344 ph10 836 if (do_flip) regexflip(re, extra);
3345     sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3346     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3347     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3348     sbuf[3] = (pcre_uint8)((true_size) & 255);
3349     sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3350     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3351     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3352     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3353 nigel 3
3354 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
3355     fwrite(re, 1, true_size, f) < true_size)
3356     {
3357     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3358     }
3359 nigel 3 else
3360     {
3361 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3362 ph10 654
3363 ph10 658 /* If there is study data, write it. */
3364 ph10 654
3365 nigel 75 if (extra != NULL)
3366 nigel 3 {
3367 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
3368     true_study_size)
3369 nigel 3 {
3370 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
3371     strerror(errno));
3372 nigel 3 }
3373 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
3374 nigel 3 }
3375     }
3376 nigel 75 fclose(f);
3377 nigel 3 }
3378 nigel 77
3379     new_free(re);
3380 ph10 836 if (extra != NULL)
3381     {
3382     PCRE_FREE_STUDY(extra);
3383     }
3384 ph10 545 if (locale_set)
3385 ph10 541 {
3386     new_free((void *)tables);
3387     setlocale(LC_CTYPE, "C");
3388 ph10 545 locale_set = 0;
3389     }
3390 nigel 75 continue; /* With next regex */
3391 nigel 3 }
3392 nigel 75 } /* End of non-POSIX compile */
3393 nigel 3
3394     /* Read data lines and test them */
3395    
3396     for (;;)
3397     {
3398 ph10 836 pcre_uint8 *q;
3399     pcre_uint8 *bptr;
3400 nigel 57 int *use_offsets = offsets;
3401 nigel 53 int use_size_offsets = size_offsets;
3402 nigel 63 int callout_data = 0;
3403     int callout_data_set = 0;
3404 nigel 3 int count, c;
3405 nigel 29 int copystrings = 0;
3406 ph10 386 int find_match_limit = default_find_match_limit;
3407 nigel 29 int getstrings = 0;
3408     int getlist = 0;
3409 nigel 39 int gmatched = 0;
3410 nigel 35 int start_offset = 0;
3411 ph10 579 int start_offset_sign = 1;
3412 nigel 41 int g_notempty = 0;
3413 nigel 77 int use_dfa = 0;
3414 ph10 922 int jit_was_used = 0;
3415 nigel 3
3416 nigel 91 *copynames = 0;
3417     *getnames = 0;
3418    
3419 ph10 881 #ifdef SUPPORT_PCRE16
3420 ph10 836 cn16ptr = copynames;
3421     gn16ptr = getnames;
3422 ph10 881 #endif
3423 ph10 903 #ifdef SUPPORT_PCRE8
3424 ph10 836 cn8ptr = copynames8;
3425     gn8ptr = getnames8;
3426 ph10 903 #endif
3427 nigel 91
3428 ph10 836 SET_PCRE_CALLOUT(callout);
3429 nigel 63 first_callout = 1;
3430 ph10 654 last_callout_mark = NULL;
3431 nigel 63 callout_extra = 0;
3432     callout_count = 0;
3433     callout_fail_count = 999999;
3434     callout_fail_id = -1;
3435 nigel 73 show_malloc = 0;
3436 ph10 836 options = 0;
3437 nigel 63
3438 nigel 91 if (extra != NULL) extra->flags &=
3439     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3440    
3441     len = 0;
3442     for (;;)
3443 nigel 11 {
3444 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3445 nigel 91 {
3446 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
3447     {
3448 ph10 545 fprintf(outfile, "\n");
3449 ph10 537 break;
3450 ph10 545 }
3451 nigel 91 done = 1;
3452     goto CONTINUE;
3453     }
3454     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3455     len = (int)strlen((char *)buffer);
3456     if (buffer[len-1] == '\n') break;
3457 nigel 11 }
3458 nigel 3
3459     while (len > 0 && isspace(buffer[len-1])) len--;
3460     buffer[len] = 0;
3461     if (len == 0) break;
3462    
3463     p = buffer;
3464     while (isspace(*p)) p++;
3465    
3466 ph10 147 bptr = q = dbuffer;
3467 nigel 3 while ((c = *p++) != 0)
3468     {
3469     int i = 0;
3470     int n = 0;
3471 ph10 842
3472 ph10 836 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3473     In non-UTF mode, allow the value of the byte to fall through to later,
3474     where values greater than 127 are turned into UTF-8 when running in
3475     16-bit mode. */
3476 ph10 842
3477 ph10 836 if (c != '\\')
3478 nigel 3 {
3479 ph10 836 if (use_utf)
3480     {
3481     *q++ = c;
3482     continue;
3483 ph10 842 }
3484     }
3485    
3486 ph10 836 /* Handle backslash escapes */
3487 ph10 842
3488 ph10 836 else switch ((c = *p++))
3489     {
3490 nigel 3 case 'a': c = 7; break;
3491     case 'b': c = '\b'; break;
3492     case 'e': c = 27; break;
3493     case 'f': c = '\f'; break;
3494     case 'n': c = '\n'; break;
3495     case 'r': c = '\r'; break;
3496     case 't': c = '\t'; break;
3497     case 'v': c = '\v'; break;
3498    
3499     case '0': case '1': case '2': case '3':
3500     case '4': case '5': case '6': case '7':
3501     c -= '0';
3502     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3503     c = c * 8 + *p++ - '0';
3504     break;
3505    
3506     case 'x':
3507 nigel 49 if (*p == '{')
3508     {
3509 ph10 836 pcre_uint8 *pt = p;
3510 nigel 49 c = 0;
3511 ph10 738
3512 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3513     when isxdigit() is a macro that refers to its argument more than
3514     once. This is banned by the C Standard, but apparently happens in at
3515     least one MacOS environment. */
3516 ph10 738
3517 ph10 735 for (pt++; isxdigit(*pt); pt++)
3518 ph10 862 {
3519     if (++i == 9)
3520     fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3521 ph10 903 "using only the first eight.\n");
3522 ph10 862 else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3523 ph10 903 }
3524 nigel 49 if (*pt == '}')
3525     {
3526     p = pt + 1;
3527     break;
3528     }
3529 ph10 836 /* Not correct form for \x{...}; fall through */
3530 nigel 49 }
3531    
3532 ph10 842 /* \x without {} always defines just one byte in 8-bit mode. This
3533     allows UTF-8 characters to be constructed byte by byte, and also allows
3534     invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3535     Otherwise, pass it down to later code so that it can be turned into
3536 ph10 836 UTF-8 when running in 16-bit mode. */
3537 nigel 49
3538 nigel 3 c = 0;
3539     while (i++ < 2 && isxdigit(*p))
3540     {
3541 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3542 nigel 3 p++;
3543     }
3544 ph10 836 if (use_utf)
3545 ph10 842 {
3546 ph10 836 *q++ = c;
3547 ph10 842 continue;
3548     }
3549 nigel 3 break;
3550    
3551 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
3552 nigel 3 p--;
3553     continue;
3554    
3555 nigel 75 case '>':
3556 ph10 579 if (*p == '-')
3557 ph10 567 {
3558     start_offset_sign = -1;
3559     p++;
3560 ph10 579 }
3561 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3562 ph10 579 start_offset *= start_offset_sign;
3563 nigel 75 continue;
3564    
3565 nigel 3 case 'A': /* Option setting */
3566     options |= PCRE_ANCHORED;
3567     continue;
3568    
3569     case 'B':
3570     options |= PCRE_NOTBOL;
3571     continue;
3572    
3573 nigel 29 case 'C':
3574 nigel 63 if (isdigit(*p)) /* Set copy string */
3575     {
3576     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3577     copystrings |= 1 << n;
3578     }
3579     else if (isalnum(*p))
3580     {
3581 ph10 836 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3582 nigel 63 }
3583     else if (*p == '+')
3584     {
3585     callout_extra = 1;
3586     p++;
3587     }
3588     else if (*p == '-')
3589     {
3590 ph10 836 SET_PCRE_CALLOUT(NULL);
3591 nigel 63 p++;
3592     }
3593     else if (*p == '!')
3594     {
3595     callout_fail_id = 0;
3596     p++;
3597     while(isdigit(*p))
3598     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3599     callout_fail_count = 0;
3600     if (*p == '!')
3601     {
3602     p++;
3603     while(isdigit(*p))
3604     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3605     }
3606     }
3607     else if (*p == '*')
3608     {
3609     int sign = 1;
3610     callout_data = 0;
3611     if (*(++p) == '-') { sign = -1; p++; }
3612     while(isdigit(*p))
3613     callout_data = callout_data * 10 + *p++ - '0';
3614     callout_data *= sign;
3615     callout_data_set = 1;
3616     }
3617 nigel 29 continue;
3618    
3619 nigel 79 #if !defined NODFA
3620 nigel 77 case 'D':
3621 nigel 79 #if !defined NOPOSIX
3622 nigel 77 if (posix || do_posix)
3623     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3624     else
3625 nigel 79 #endif
3626 nigel 77 use_dfa = 1;
3627     continue;
3628 ph10 553 #endif
3629 nigel 77
3630 ph10 553 #if !defined NODFA
3631 nigel 77 case 'F':
3632     options |= PCRE_DFA_SHORTEST;
3633     continue;
3634 nigel 79 #endif
3635 nigel 77
3636 nigel 29 case 'G':
3637 nigel 63 if (isdigit(*p))
3638     {
3639     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3640     getstrings |= 1 << n;
3641     }
3642     else if (isalnum(*p))
3643     {
3644 ph10 836 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3645 nigel 63 }
3646 nigel 29 continue;
3647 ph10 691
3648 ph10 667 case 'J':
3649     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3650 ph10 691 if (extra != NULL
3651     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3652 ph10 667 && extra->executable_jit != NULL)
3653 ph10 691 {
3654 zherczeg 852 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3655 ph10 836 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3656     PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3657 ph10 691 }
3658 ph10 667 continue;
3659 nigel 29
3660     case 'L':
3661     getlist = 1;
3662     continue;
3663    
3664 nigel 63 case 'M':
3665     find_match_limit = 1;
3666     continue;
3667    
3668 nigel 37 case 'N':
3669 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
3670     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3671 ph10 461 else
3672 ph10 442 options |= PCRE_NOTEMPTY;
3673 nigel 37 continue;
3674    
3675 nigel 3 case 'O':
3676     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3677 nigel 53 if (n > size_offsets_max)
3678     {
3679     size_offsets_max = n;
3680 nigel 57 free(offsets);
3681 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3682 nigel 53 if (offsets == NULL)
3683     {
3684     printf("** Failed to get %d bytes of memory for offsets vector\n",
3685 ph10 151 (int)(size_offsets_max * sizeof(int)));
3686 nigel 77 yield = 1;
3687     goto EXIT;
3688 nigel 53 }
3689     }
3690     use_size_offsets = n;
3691 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3692 nigel 3 continue;
3693    
3694 nigel 75 case 'P':
3695 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3696 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3697 nigel 75 continue;
3698    
3699 nigel 91 case 'Q':
3700     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3701     if (extra == NULL)
3702     {
3703     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3704     extra->flags = 0;
3705     }
3706     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3707     extra->match_limit_recursion = n;
3708     continue;
3709    
3710     case 'q':
3711     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3712     if (extra == NULL)
3713     {
3714     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3715     extra->flags = 0;
3716     }
3717     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3718     extra->match_limit = n;
3719     continue;
3720    
3721 nigel 79 #if !defined NODFA
3722 nigel 77 case 'R':
3723     options |= PCRE_DFA_RESTART;
3724     continue;
3725 nigel 79 #endif
3726 nigel 77
3727 nigel 73 case 'S':
3728     show_malloc = 1;
3729     continue;
3730 ph10 392
3731 ph10 389 case 'Y':
3732     options |= PCRE_NO_START_OPTIMIZE;
3733 ph10 392 continue;
3734 nigel 73
3735 nigel 3 case 'Z':
3736     options |= PCRE_NOTEOL;
3737     continue;
3738 nigel 71
3739     case '?':
3740     options |= PCRE_NO_UTF8_CHECK;
3741     continue;
3742 nigel 91
3743     case '<':
3744     {
3745     int x = check_newline(p, outfile);
3746     if (x == 0) goto NEXT_DATA;
3747     options |= x;
3748     while (*p++ != '>');
3749     }
3750     continue;
3751 nigel 3 }
3752 ph10 836
3753 ph10 842 /* We now have a character value in c that may be greater than 255. In
3754     16-bit mode, we always convert characters to UTF-8 so that values greater
3755 ph10 836 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3756 ph10 842 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3757 ph10 836 mode must have come from \x{...} or octal constructs because values from
3758     \x.. get this far only in non-UTF mode. */
3759    
3760 ph10 842 #if !defined NOUTF || defined SUPPORT_PCRE16
3761 ph10 836 if (use_pcre16 || use_utf)
3762     {
3763     pcre_uint8 buff8[8];
3764     int ii, utn;
3765     utn = ord2utf8(c, buff8);
3766     for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3767     }
3768     else
3769 ph10 842 #endif
3770 ph10 836 {
3771     if (c > 255)
3772     {
3773     fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3774     "and UTF-8 mode is not enabled.\n", c);
3775     fprintf(outfile, "** Truncation will probably give the wrong "
3776     "result.\n");
3777     }
3778     *q++ = c;
3779     }
3780 nigel 3 }
3781 ph10 842
3782 ph10 836 /* Reached end of subject string */
3783 ph10 842
3784 nigel 9 *q = 0;
3785 ph10 530 len = (int)(q - dbuffer);
3786 ph10 545
3787 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
3788 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3789 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
3790 ph10 371
3791 ph10 363 #if !defined NOPOSIX
3792     if (posix || do_posix)
3793     {
3794     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3795 ph10 371 bptr += buffer_size - len - 1;
3796 ph10 363 }
3797 ph10 371 else
3798     #endif
3799 ph10 363 {
3800     memmove(bptr + buffer_size - len, bptr, len);
3801 ph10 371 bptr += buffer_size - len;
3802     }
3803 nigel 3
3804 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
3805     {
3806     printf("**Match limit not relevant for DFA matching: ignored\n");
3807     find_match_limit = 0;
3808     }
3809    
3810 nigel 3 /* Handle matching via the POSIX interface, which does not
3811 nigel 63 support timing or playing with the match limit or callout data. */
3812 nigel 3
3813 nigel 37 #if !defined NOPOSIX
3814 nigel 3 if (posix || do_posix)
3815     {
3816     int rc;
3817     int eflags = 0;
3818 nigel 63 regmatch_t *pmatch = NULL;
3819     if (use_size_offsets > 0)
3820 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3821 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3822     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3823 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3824 nigel 3
3825 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3826 nigel 3
3827     if (rc != 0)
3828     {
3829 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3830 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3831     }
3832 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3833     != 0)
3834     {
3835     fprintf(outfile, "Matched with REG_NOSUB\n");
3836     }
3837 nigel 3 else
3838     {
3839 nigel 7 size_t i;
3840 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
3841 nigel 3 {
3842     if (pmatch[i].rm_so >= 0)
3843     {
3844 nigel 23 fprintf(outfile, "%2d: ", (int)i);
3845 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_so,
3846 nigel 63 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3847 nigel 3 fprintf(outfile, "\n");
3848 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3849 nigel 35 {
3850 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
3851 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3852 nigel 63 outfile);
3853 nigel 35 fprintf(outfile, "\n");
3854     }
3855 nigel 3 }
3856     }
3857     }
3858 nigel 53 free(pmatch);
3859 ph10 836 goto NEXT_DATA;
3860 nigel 3 }
3861    
3862 ph10 836 #endif /* !defined NOPOSIX */
3863    
3864 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
3865 nigel 3
3866 ph10 836 #ifdef SUPPORT_PCRE16
3867     if (use_pcre16)
3868     {
3869 zherczeg 852 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3870 ph10 836 switch(len)
3871     {
3872     case -1:
3873     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3874     "converted to UTF-16\n");
3875     goto NEXT_DATA;
3876 nigel 37