/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 893 - (hide annotations) (download)
Thu Jan 19 17:15:11 2012 UTC (16 months ago) by ph10
File MIME type: text/plain
File size: 130426 byte(s)
Experimental stack size determination.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 836 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39 ph10 836 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40     libraries in a single program. This is different from the modules such as
41     pcre_compile.c in the library itself, which are compiled separately for each
42     mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43     (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44     compiled only once. Therefore, it must not make use of any of the macros from
45     pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46     however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47     only supported library functions. */
48 nigel 75
49 ph10 836
50 ph10 200 #ifdef HAVE_CONFIG_H
51 ph10 236 #include "config.h"
52 ph10 200 #endif
53 ph10 199
54 nigel 3 #include <ctype.h>
55     #include <stdio.h>
56     #include <string.h>
57     #include <stdlib.h>
58     #include <time.h>
59 nigel 25 #include <locale.h>
60 nigel 75 #include <errno.h>
61 nigel 3
62 ph10 287 #ifdef SUPPORT_LIBREADLINE
63 ph10 343 #ifdef HAVE_UNISTD_H
64 ph10 287 #include <unistd.h>
65 ph10 343 #endif
66 ph10 287 #include <readline/readline.h>
67     #include <readline/history.h>
68     #endif
69 nigel 93
70 ph10 287
71 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
72     input and output without "b"; then I was told that "b" was needed in some
73     environments, so it was added for release 5.0 to both the input and output. (It
74     makes no difference on Unix-like systems.) Later I was told that it is wrong
75     for the input on Windows. I've now abstracted the modes into two macros that
76     are set here, to make it easier to fiddle with them, and removed "b" from the
77     input mode under Windows. */
78    
79     #if defined(_WIN32) || defined(WIN32)
80     #include <io.h> /* For _setmode() */
81     #include <fcntl.h> /* For _O_BINARY */
82     #define INPUT_MODE "r"
83     #define OUTPUT_MODE "wb"
84    
85 ph10 411 #ifndef isatty
86     #define isatty _isatty /* This is what Windows calls them, I'm told, */
87     #endif /* though in some environments they seem to */
88     /* be already defined, hence the #ifndefs. */
89     #ifndef fileno
90 ph10 343 #define fileno _fileno
91 ph10 411 #endif
92 ph10 343
93 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95     #ifdef __BORLANDC__
96     #define _setmode(handle, mode) setmode(handle, mode)
97     #endif
98    
99     /* Not Windows */
100    
101 nigel 93 #else
102     #include <sys/time.h> /* These two includes are needed */
103     #include <sys/resource.h> /* for setrlimit(). */
104     #define INPUT_MODE "rb"
105     #define OUTPUT_MODE "wb"
106 nigel 91 #endif
107    
108 nigel 93
109 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
110     displaying the results of pcre_study() and we also need to know about the
111     internal macros, structures, and other internal data values; pcretest has
112     "inside information" compared to a program that strictly follows the PCRE API.
113 nigel 37
114 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116     appropriately for an application, not for building PCRE. */
117 nigel 77
118 ph10 145 #include "pcre.h"
119 ph10 836
120     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121     /* Configure internal macros to 16 bit mode. */
122     #define COMPILE_PCRE16
123     #endif
124    
125 nigel 77 #include "pcre_internal.h"
126    
127 ph10 836 /* The pcre_printint() function, which prints the internal form of a compiled
128     regex, is held in a separate file so that (a) it can be compiled in either
129     8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130     when that is compiled in debug mode. */
131    
132     #ifdef SUPPORT_PCRE8
133     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134     #endif
135     #ifdef SUPPORT_PCRE16
136     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137     #endif
138    
139 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
140     to keep two copies, we include the source file here, changing the names of the
141     external symbols to prevent clashes. */
142 nigel 77
143 ph10 836 #define PCRE_INCLUDED
144     #undef PRIV
145     #define PRIV(name) name
146 nigel 85
147     #include "pcre_tables.c"
148    
149 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
150 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
151 ph10 836 the same as in the printint.src file. We uses it here in cases when the locale
152     has not been explicitly changed, so as to get consistent output from systems
153     that differ in their output from isprint() even in the "C" locale. */
154 nigel 93
155 ph10 836 #ifdef EBCDIC
156     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157     #else
158     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159     #endif
160 nigel 85
161 ph10 836 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163     /* Posix support is disabled in 16 bit only mode. */
164     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165     #define NOPOSIX
166     #endif
167    
168 nigel 37 /* It is possible to compile this test program without including support for
169     testing the POSIX interface, though this is not available via the standard
170     Makefile. */
171    
172     #if !defined NOPOSIX
173 nigel 3 #include "pcreposix.h"
174 nigel 37 #endif
175 nigel 3
176 ph10 836 /* It is also possible, originally for the benefit of a version that was
177     imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178     NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179     automatically cut out the UTF support if PCRE is built without it. */
180 nigel 79
181 ph10 836 #ifndef SUPPORT_UTF
182     #ifndef NOUTF
183     #define NOUTF
184 ph10 107 #endif
185     #endif
186 nigel 79
187 ph10 836 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188     for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189     only from one place and is handled differently). I couldn't dream up any way of
190     using a single macro to do this in a generic way, because of the many different
191     argument requirements. We know that at least one of SUPPORT_PCRE8 and
192     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193     use these in the definitions of generic macros.
194 ph10 107
195 ph10 836 **** Special note about the PCHARSxxx macros: the address of the string to be
196     printed is always given as two arguments: a base address followed by an offset.
197     The base address is cast to the correct data size for 8 or 16 bit data; the
198     offset is in units of this size. If the string were given as base+offset in one
199     argument, the casting might be incorrectly applied. */
200    
201     #ifdef SUPPORT_PCRE8
202    
203     #define PCHARS8(lv, p, offset, len, f) \
204     lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206     #define PCHARSV8(p, offset, len, f) \
207     (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209     #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210     p = read_capture_name8(p, cn8, re)
211    
212 zherczeg 852 #define STRLEN8(p) ((int)strlen((char *)p))
213    
214 ph10 836 #define SET_PCRE_CALLOUT8(callout) \
215     pcre_callout = callout
216    
217 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218     pcre_assign_jit_stack(extra, callback, userdata)
219 ph10 836
220     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221     re = pcre_compile((char *)pat, options, error, erroffset, tables)
222    
223     #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224     namesptr, cbuffer, size) \
225     rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226     (char *)namesptr, cbuffer, size)
227    
228     #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229     rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230    
231     #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232     offsets, size_offsets, workspace, size_workspace) \
233     count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234     offsets, size_offsets, workspace, size_workspace)
235    
236     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237     offsets, size_offsets) \
238     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239     offsets, size_offsets)
240    
241     #define PCRE_FREE_STUDY8(extra) \
242     pcre_free_study(extra)
243    
244     #define PCRE_FREE_SUBSTRING8(substring) \
245     pcre_free_substring(substring)
246    
247     #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248     pcre_free_substring_list(listptr)
249    
250     #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251     getnamesptr, subsptr) \
252     rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253     (char *)getnamesptr, subsptr)
254    
255     #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256     n = pcre_get_stringnumber(re, (char *)ptr)
257    
258     #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259     rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260    
261     #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262     rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263    
264 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265     rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266 ph10 836
267     #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268     pcre_printint(re, outfile, debug_lengths)
269    
270     #define PCRE_STUDY8(extra, re, options, error) \
271     extra = pcre_study(re, options, error)
272    
273 zherczeg 852 #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274     pcre_jit_stack_alloc(startsize, maxsize)
275    
276     #define PCRE_JIT_STACK_FREE8(stack) \
277     pcre_jit_stack_free(stack)
278    
279 ph10 836 #endif /* SUPPORT_PCRE8 */
280    
281     /* -----------------------------------------------------------*/
282    
283     #ifdef SUPPORT_PCRE16
284    
285     #define PCHARS16(lv, p, offset, len, f) \
286     lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287    
288     #define PCHARSV16(p, offset, len, f) \
289     (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290    
291     #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292     p = read_capture_name16(p, cn16, re)
293    
294     #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295    
296     #define SET_PCRE_CALLOUT16(callout) \
297 zherczeg 850 pcre16_callout = (int (*)(pcre16_callout_block *))callout
298 ph10 836
299 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300     pcre16_assign_jit_stack((pcre16_extra *)extra, \
301     (pcre16_jit_callback)callback, userdata)
302 ph10 836
303     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304 zherczeg 852 re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305     tables)
306 ph10 836
307     #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308     namesptr, cbuffer, size) \
309 zherczeg 852 rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310 zherczeg 860 count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311 ph10 836
312     #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313     rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314 zherczeg 860 (PCRE_UCHAR16 *)cbuffer, size/2)
315 ph10 836
316     #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317     offsets, size_offsets, workspace, size_workspace) \
318 zherczeg 852 count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319     (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320     workspace, size_workspace)
321 ph10 836
322     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323     offsets, size_offsets) \
324 zherczeg 852 count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325     len, start_offset, options, offsets, size_offsets)
326 ph10 836
327     #define PCRE_FREE_STUDY16(extra) \
328 zherczeg 850 pcre16_free_study((pcre16_extra *)extra)
329 ph10 836
330     #define PCRE_FREE_SUBSTRING16(substring) \
331     pcre16_free_substring((PCRE_SPTR16)substring)
332    
333     #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334     pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335    
336     #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337     getnamesptr, subsptr) \
338 zherczeg 852 rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339     count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340 ph10 836
341     #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342     n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343    
344     #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345     rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346     (PCRE_SPTR16 *)(void*)subsptr)
347    
348     #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349     rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350     (PCRE_SPTR16 **)(void*)listptr)
351    
352 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353 zherczeg 852 rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354     tables)
355 ph10 836
356     #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357     pcre16_printint(re, outfile, debug_lengths)
358    
359     #define PCRE_STUDY16(extra, re, options, error) \
360 zherczeg 852 extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361 ph10 836
362 zherczeg 852 #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363     (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364    
365     #define PCRE_JIT_STACK_FREE16(stack) \
366     pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367    
368 ph10 836 #endif /* SUPPORT_PCRE16 */
369    
370    
371     /* ----- Both modes are supported; a runtime test is needed, except for
372     pcre_config(), and the JIT stack functions, when it doesn't matter which
373     version is called. ----- */
374    
375     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376    
377     #define CHAR_SIZE (use_pcre16? 2:1)
378    
379     #define PCHARS(lv, p, offset, len, f) \
380     if (use_pcre16) \
381     PCHARS16(lv, p, offset, len, f); \
382     else \
383     PCHARS8(lv, p, offset, len, f)
384    
385     #define PCHARSV(p, offset, len, f) \
386     if (use_pcre16) \
387     PCHARSV16(p, offset, len, f); \
388     else \
389     PCHARSV8(p, offset, len, f)
390    
391     #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392     if (use_pcre16) \
393     READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394     else \
395     READ_CAPTURE_NAME8(p, cn8, cn16, re)
396    
397     #define SET_PCRE_CALLOUT(callout) \
398     if (use_pcre16) \
399     SET_PCRE_CALLOUT16(callout); \
400     else \
401     SET_PCRE_CALLOUT8(callout)
402    
403     #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404    
405 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406     if (use_pcre16) \
407     PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408     else \
409     PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410 ph10 836
411     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412     if (use_pcre16) \
413     PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414     else \
415     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416    
417     #define PCRE_CONFIG pcre_config
418    
419     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420     namesptr, cbuffer, size) \
421     if (use_pcre16) \
422     PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423     namesptr, cbuffer, size); \
424     else \
425     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426     namesptr, cbuffer, size)
427    
428     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429     if (use_pcre16) \
430     PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431     else \
432     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433    
434     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435     offsets, size_offsets, workspace, size_workspace) \
436     if (use_pcre16) \
437     PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438     offsets, size_offsets, workspace, size_workspace); \
439     else \
440     PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441     offsets, size_offsets, workspace, size_workspace)
442    
443     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444     offsets, size_offsets) \
445     if (use_pcre16) \
446     PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447     offsets, size_offsets); \
448     else \
449     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450     offsets, size_offsets)
451    
452     #define PCRE_FREE_STUDY(extra) \
453     if (use_pcre16) \
454     PCRE_FREE_STUDY16(extra); \
455     else \
456     PCRE_FREE_STUDY8(extra)
457    
458     #define PCRE_FREE_SUBSTRING(substring) \
459     if (use_pcre16) \
460     PCRE_FREE_SUBSTRING16(substring); \
461     else \
462     PCRE_FREE_SUBSTRING8(substring)
463    
464     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465     if (use_pcre16) \
466     PCRE_FREE_SUBSTRING_LIST16(listptr); \
467     else \
468     PCRE_FREE_SUBSTRING_LIST8(listptr)
469    
470     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471     getnamesptr, subsptr) \
472     if (use_pcre16) \
473     PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474     getnamesptr, subsptr); \
475     else \
476     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477     getnamesptr, subsptr)
478    
479     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480     if (use_pcre16) \
481     PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482     else \
483     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484    
485     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486     if (use_pcre16) \
487     PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488     else \
489     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490    
491     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492     if (use_pcre16) \
493     PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494     else \
495     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496    
497 zherczeg 852 #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498     (use_pcre16 ? \
499     PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500     :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501 ph10 836
502 zherczeg 852 #define PCRE_JIT_STACK_FREE(stack) \
503     if (use_pcre16) \
504     PCRE_JIT_STACK_FREE16(stack); \
505     else \
506     PCRE_JIT_STACK_FREE8(stack)
507    
508 ph10 836 #define PCRE_MAKETABLES \
509     (use_pcre16? pcre16_maketables() : pcre_maketables())
510    
511 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512 ph10 836 if (use_pcre16) \
513 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514 ph10 836 else \
515 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516 ph10 836
517     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518     if (use_pcre16) \
519     PCRE_PRINTINT16(re, outfile, debug_lengths); \
520     else \
521     PCRE_PRINTINT8(re, outfile, debug_lengths)
522    
523     #define PCRE_STUDY(extra, re, options, error) \
524     if (use_pcre16) \
525     PCRE_STUDY16(extra, re, options, error); \
526     else \
527     PCRE_STUDY8(extra, re, options, error)
528    
529     /* ----- Only 8-bit mode is supported ----- */
530    
531     #elif defined SUPPORT_PCRE8
532     #define CHAR_SIZE 1
533     #define PCHARS PCHARS8
534     #define PCHARSV PCHARSV8
535     #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
536     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
537     #define STRLEN STRLEN8
538 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK8
539 ph10 836 #define PCRE_COMPILE PCRE_COMPILE8
540     #define PCRE_CONFIG pcre_config
541     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
543     #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
544     #define PCRE_EXEC PCRE_EXEC8
545     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
546     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
547     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
548     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
549     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
550     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
551     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
552 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC8
553     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE8
554 ph10 836 #define PCRE_MAKETABLES pcre_maketables()
555     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556     #define PCRE_PRINTINT PCRE_PRINTINT8
557     #define PCRE_STUDY PCRE_STUDY8
558    
559     /* ----- Only 16-bit mode is supported ----- */
560    
561     #else
562     #define CHAR_SIZE 2
563     #define PCHARS PCHARS16
564     #define PCHARSV PCHARSV16
565     #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
566     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
567     #define STRLEN STRLEN16
568 zherczeg 852 #define PCRE_ASSIGN_JIT_STACK PCRE_ASSIGN_JIT_STACK16
569 ph10 836 #define PCRE_COMPILE PCRE_COMPILE16
570     #define PCRE_CONFIG pcre16_config
571     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
573     #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
574     #define PCRE_EXEC PCRE_EXEC16
575     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
576     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
577     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
578     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
579     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
580     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
581     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
582 zherczeg 852 #define PCRE_JIT_STACK_ALLOC PCRE_JIT_STACK_ALLOC16
583     #define PCRE_JIT_STACK_FREE PCRE_JIT_STACK_FREE16
584 ph10 836 #define PCRE_MAKETABLES pcre16_maketables()
585     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586     #define PCRE_PRINTINT PCRE_PRINTINT16
587     #define PCRE_STUDY PCRE_STUDY16
588     #endif
589    
590     /* ----- End of mode-specific function call macros ----- */
591    
592    
593 nigel 85 /* Other parameters */
594    
595 nigel 3 #ifndef CLOCKS_PER_SEC
596     #ifdef CLK_TCK
597     #define CLOCKS_PER_SEC CLK_TCK
598     #else
599     #define CLOCKS_PER_SEC 100
600     #endif
601     #endif
602    
603 nigel 93 /* This is the default loop count for timing. */
604    
605 nigel 75 #define LOOPREPEAT 500000
606 nigel 3
607 nigel 85 /* Static variables */
608    
609 nigel 3 static FILE *outfile;
610     static int log_store = 0;
611 nigel 63 static int callout_count;
612     static int callout_extra;
613     static int callout_fail_count;
614     static int callout_fail_id;
615 ph10 210 static int debug_lengths;
616 nigel 63 static int first_callout;
617 nigel 93 static int locale_set = 0;
618 nigel 73 static int show_malloc;
619 ph10 836 static int use_utf;
620 nigel 43 static size_t gotten_store;
621 ph10 836 static size_t first_gotten_store = 0;
622 ph10 645 static const unsigned char *last_callout_mark = NULL;
623 nigel 3
624 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
625    
626     static int buffer_size = 50000;
627 ph10 836 static pcre_uint8 *buffer = NULL;
628     static pcre_uint8 *dbuffer = NULL;
629     static pcre_uint8 *pbuffer = NULL;
630 nigel 3
631 ph10 836 /* Another buffer is needed translation to 16-bit character strings. It will
632     obtained and extended as required. */
633    
634     #ifdef SUPPORT_PCRE16
635     static int buffer16_size = 0;
636     static pcre_uint16 *buffer16 = NULL;
637    
638     #ifdef SUPPORT_PCRE8
639    
640     /* We need the table of operator lengths that is used for 16-bit compiling, in
641     order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642     data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643     appropriately for the 16-bit world. Just as a safety check, make sure that
644     COMPILE_PCRE16 is *not* set. */
645    
646     #ifdef COMPILE_PCRE16
647     #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648     #endif
649    
650     #if LINK_SIZE == 2
651     #undef LINK_SIZE
652     #define LINK_SIZE 1
653     #elif LINK_SIZE == 3 || LINK_SIZE == 4
654     #undef LINK_SIZE
655     #define LINK_SIZE 2
656     #else
657     #error LINK_SIZE must be either 2, 3, or 4
658     #endif
659    
660 zherczeg 839 #undef IMM2_SIZE
661     #define IMM2_SIZE 1
662    
663 ph10 836 #endif /* SUPPORT_PCRE8 */
664    
665     static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666     #endif /* SUPPORT_PCRE16 */
667    
668     /* If we have 8-bit support, default use_pcre16 to false; if there is also
669     16-bit support, it can be changed by an option. If there is no 8-bit support,
670     there must be 16-bit support, so default it to 1. */
671    
672     #ifdef SUPPORT_PCRE8
673     static int use_pcre16 = 0;
674     #else
675     static int use_pcre16 = 1;
676     #endif
677    
678 ph10 598 /* Textual explanations for runtime error codes */
679 nigel 75
680 ph10 598 static const char *errtexts[] = {
681     NULL, /* 0 is no error */
682     NULL, /* NOMATCH is handled specially */
683     "NULL argument passed",
684     "bad option value",
685     "magic number missing",
686     "unknown opcode - pattern overwritten?",
687     "no more memory",
688 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
689 ph10 598 "match limit exceeded",
690     "callout error code",
691 ph10 836 NULL, /* BADUTF8/16 is handled specially */
692     NULL, /* BADUTF8/16 offset is handled specially */
693 ph10 598 NULL, /* PARTIAL is handled specially */
694     "not used - internal error",
695     "internal error - pattern overwritten?",
696     "bad count value",
697     "item unsupported for DFA matching",
698     "backreference condition or recursion test not supported for DFA matching",
699     "match limit not supported for DFA matching",
700     "workspace size exceeded in DFA matching",
701 ph10 654 "too much recursion for DFA matching",
702 ph10 598 "recursion limit exceeded",
703     "not used - internal error",
704     "invalid combination of newline options",
705     "bad offset value",
706 ph10 836 NULL, /* SHORTUTF8/16 is handled specially */
707 ph10 676 "nested recursion at the same subject position",
708 ph10 836 "JIT stack limit reached",
709     "pattern compiled in wrong mode: 8-bit/16-bit error"
710 ph10 598 };
711    
712 ph10 654
713 ph10 541 /*************************************************
714     * Alternate character tables *
715     *************************************************/
716 nigel 49
717 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
718     using the default tables of the library. However, the T option can be used to
719     select alternate sets of tables, for different kinds of testing. Note also that
720 ph10 541 the L (locale) option also adjusts the tables. */
721    
722 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
723 ph10 541 only ASCII characters. */
724    
725 ph10 836 static const pcre_uint8 tables0[] = {
726 ph10 541
727     /* This table is a lower casing table. */
728    
729     0, 1, 2, 3, 4, 5, 6, 7,
730     8, 9, 10, 11, 12, 13, 14, 15,
731     16, 17, 18, 19, 20, 21, 22, 23,
732     24, 25, 26, 27, 28, 29, 30, 31,
733     32, 33, 34, 35, 36, 37, 38, 39,
734     40, 41, 42, 43, 44, 45, 46, 47,
735     48, 49, 50, 51, 52, 53, 54, 55,
736     56, 57, 58, 59, 60, 61, 62, 63,
737     64, 97, 98, 99,100,101,102,103,
738     104,105,106,107,108,109,110,111,
739     112,113,114,115,116,117,118,119,
740     120,121,122, 91, 92, 93, 94, 95,
741     96, 97, 98, 99,100,101,102,103,
742     104,105,106,107,108,109,110,111,
743     112,113,114,115,116,117,118,119,
744     120,121,122,123,124,125,126,127,
745     128,129,130,131,132,133,134,135,
746     136,137,138,139,140,141,142,143,
747     144,145,146,147,148,149,150,151,
748     152,153,154,155,156,157,158,159,
749     160,161,162,163,164,165,166,167,
750     168,169,170,171,172,173,174,175,
751     176,177,178,179,180,181,182,183,
752     184,185,186,187,188,189,190,191,
753     192,193,194,195,196,197,198,199,
754     200,201,202,203,204,205,206,207,
755     208,209,210,211,212,213,214,215,
756     216,217,218,219,220,221,222,223,
757     224,225,226,227,228,229,230,231,
758     232,233,234,235,236,237,238,239,
759     240,241,242,243,244,245,246,247,
760     248,249,250,251,252,253,254,255,
761    
762     /* This table is a case flipping table. */
763    
764     0, 1, 2, 3, 4, 5, 6, 7,
765     8, 9, 10, 11, 12, 13, 14, 15,
766     16, 17, 18, 19, 20, 21, 22, 23,
767     24, 25, 26, 27, 28, 29, 30, 31,
768     32, 33, 34, 35, 36, 37, 38, 39,
769     40, 41, 42, 43, 44, 45, 46, 47,
770     48, 49, 50, 51, 52, 53, 54, 55,
771     56, 57, 58, 59, 60, 61, 62, 63,
772     64, 97, 98, 99,100,101,102,103,
773     104,105,106,107,108,109,110,111,
774     112,113,114,115,116,117,118,119,
775     120,121,122, 91, 92, 93, 94, 95,
776     96, 65, 66, 67, 68, 69, 70, 71,
777     72, 73, 74, 75, 76, 77, 78, 79,
778     80, 81, 82, 83, 84, 85, 86, 87,
779     88, 89, 90,123,124,125,126,127,
780     128,129,130,131,132,133,134,135,
781     136,137,138,139,140,141,142,143,
782     144,145,146,147,148,149,150,151,
783     152,153,154,155,156,157,158,159,
784     160,161,162,163,164,165,166,167,
785     168,169,170,171,172,173,174,175,
786     176,177,178,179,180,181,182,183,
787     184,185,186,187,188,189,190,191,
788     192,193,194,195,196,197,198,199,
789     200,201,202,203,204,205,206,207,
790     208,209,210,211,212,213,214,215,
791     216,217,218,219,220,221,222,223,
792     224,225,226,227,228,229,230,231,
793     232,233,234,235,236,237,238,239,
794     240,241,242,243,244,245,246,247,
795     248,249,250,251,252,253,254,255,
796    
797     /* This table contains bit maps for various character classes. Each map is 32
798     bytes long and the bits run from the least significant end of each byte. The
799     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
800     graph, print, punct, and cntrl. Other classes are built from combinations. */
801    
802     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
803     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
805     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
806    
807     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
808     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
809     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
810     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
811    
812     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
813     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
815     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
816    
817     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
819     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
820     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
821    
822     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
824     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
825     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
826    
827     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
828     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
829     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
830     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831    
832     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
833     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
834     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836    
837     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
838     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
839     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
840     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841    
842     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
843     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
844     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846    
847     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
849     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851    
852     /* This table identifies various classes of character by individual bits:
853     0x01 white space character
854     0x02 letter
855     0x04 decimal digit
856     0x08 hexadecimal digit
857     0x10 alphanumeric or '_'
858     0x80 regular expression metacharacter or binary zero
859     */
860    
861     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
862     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
863     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
864     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
865     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
866     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
867     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
868     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
869     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
870     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
871     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
872     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
873     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
874     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
875     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
876     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
877     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
878     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
879     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
880     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
881     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
882     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
883     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
884     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
885     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
886     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
887     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
888     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
889     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
890     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
891     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
892     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
893    
894 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
895     be at least an approximation of ISO 8859. In particular, there are characters
896 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
897    
898 ph10 836 static const pcre_uint8 tables1[] = {
899 ph10 541 0,1,2,3,4,5,6,7,
900     8,9,10,11,12,13,14,15,
901     16,17,18,19,20,21,22,23,
902     24,25,26,27,28,29,30,31,
903     32,33,34,35,36,37,38,39,
904     40,41,42,43,44,45,46,47,
905     48,49,50,51,52,53,54,55,
906     56,57,58,59,60,61,62,63,
907     64,97,98,99,100,101,102,103,
908     104,105,106,107,108,109,110,111,
909     112,113,114,115,116,117,118,119,
910     120,121,122,91,92,93,94,95,
911     96,97,98,99,100,101,102,103,
912     104,105,106,107,108,109,110,111,
913     112,113,114,115,116,117,118,119,
914     120,121,122,123,124,125,126,127,
915     128,129,130,131,132,133,134,135,
916     136,137,138,139,140,141,142,143,
917     144,145,146,147,148,149,150,151,
918     152,153,154,155,156,157,158,159,
919     160,161,162,163,164,165,166,167,
920     168,169,170,171,172,173,174,175,
921     176,177,178,179,180,181,182,183,
922     184,185,186,187,188,189,190,191,
923     224,225,226,227,228,229,230,231,
924     232,233,234,235,236,237,238,239,
925     240,241,242,243,244,245,246,215,
926     248,249,250,251,252,253,254,223,
927     224,225,226,227,228,229,230,231,
928     232,233,234,235,236,237,238,239,
929     240,241,242,243,244,245,246,247,
930     248,249,250,251,252,253,254,255,
931     0,1,2,3,4,5,6,7,
932     8,9,10,11,12,13,14,15,
933     16,17,18,19,20,21,22,23,
934     24,25,26,27,28,29,30,31,
935     32,33,34,35,36,37,38,39,
936     40,41,42,43,44,45,46,47,
937     48,49,50,51,52,53,54,55,
938     56,57,58,59,60,61,62,63,
939     64,97,98,99,100,101,102,103,
940     104,105,106,107,108,109,110,111,
941     112,113,114,115,116,117,118,119,
942     120,121,122,91,92,93,94,95,
943     96,65,66,67,68,69,70,71,
944     72,73,74,75,76,77,78,79,
945     80,81,82,83,84,85,86,87,
946     88,89,90,123,124,125,126,127,
947     128,129,130,131,132,133,134,135,
948     136,137,138,139,140,141,142,143,
949     144,145,146,147,148,149,150,151,
950     152,153,154,155,156,157,158,159,
951     160,161,162,163,164,165,166,167,
952     168,169,170,171,172,173,174,175,
953     176,177,178,179,180,181,182,183,
954     184,185,186,187,188,189,190,191,
955     224,225,226,227,228,229,230,231,
956     232,233,234,235,236,237,238,239,
957     240,241,242,243,244,245,246,215,
958     248,249,250,251,252,253,254,223,
959     192,193,194,195,196,197,198,199,
960     200,201,202,203,204,205,206,207,
961     208,209,210,211,212,213,214,247,
962     216,217,218,219,220,221,222,255,
963     0,62,0,0,1,0,0,0,
964     0,0,0,0,0,0,0,0,
965     32,0,0,0,1,0,0,0,
966     0,0,0,0,0,0,0,0,
967     0,0,0,0,0,0,255,3,
968     126,0,0,0,126,0,0,0,
969     0,0,0,0,0,0,0,0,
970     0,0,0,0,0,0,0,0,
971     0,0,0,0,0,0,255,3,
972     0,0,0,0,0,0,0,0,
973     0,0,0,0,0,0,12,2,
974     0,0,0,0,0,0,0,0,
975     0,0,0,0,0,0,0,0,
976     254,255,255,7,0,0,0,0,
977     0,0,0,0,0,0,0,0,
978     255,255,127,127,0,0,0,0,
979     0,0,0,0,0,0,0,0,
980     0,0,0,0,254,255,255,7,
981     0,0,0,0,0,4,32,4,
982     0,0,0,128,255,255,127,255,
983     0,0,0,0,0,0,255,3,
984     254,255,255,135,254,255,255,7,
985     0,0,0,0,0,4,44,6,
986     255,255,127,255,255,255,127,255,
987     0,0,0,0,254,255,255,255,
988     255,255,255,255,255,255,255,127,
989     0,0,0,0,254,255,255,255,
990     255,255,255,255,255,255,255,255,
991     0,2,0,0,255,255,255,255,
992     255,255,255,255,255,255,255,127,
993     0,0,0,0,255,255,255,255,
994     255,255,255,255,255,255,255,255,
995     0,0,0,0,254,255,0,252,
996     1,0,0,248,1,0,0,120,
997     0,0,0,0,254,255,255,255,
998     0,0,128,0,0,0,128,0,
999     255,255,255,255,0,0,0,0,
1000     0,0,0,0,0,0,0,128,
1001     255,255,255,255,0,0,0,0,
1002     0,0,0,0,0,0,0,0,
1003     128,0,0,0,0,0,0,0,
1004     0,1,1,0,1,1,0,0,
1005     0,0,0,0,0,0,0,0,
1006     0,0,0,0,0,0,0,0,
1007     1,0,0,0,128,0,0,0,
1008     128,128,128,128,0,0,128,0,
1009     28,28,28,28,28,28,28,28,
1010     28,28,0,0,0,0,0,128,
1011     0,26,26,26,26,26,26,18,
1012     18,18,18,18,18,18,18,18,
1013     18,18,18,18,18,18,18,18,
1014     18,18,18,128,128,0,128,16,
1015     0,26,26,26,26,26,26,18,
1016     18,18,18,18,18,18,18,18,
1017     18,18,18,18,18,18,18,18,
1018     18,18,18,128,128,0,0,0,
1019     0,0,0,0,0,1,0,0,
1020     0,0,0,0,0,0,0,0,
1021     0,0,0,0,0,0,0,0,
1022     0,0,0,0,0,0,0,0,
1023     1,0,0,0,0,0,0,0,
1024     0,0,18,0,0,0,0,0,
1025     0,0,20,20,0,18,0,0,
1026     0,20,18,0,0,0,0,0,
1027     18,18,18,18,18,18,18,18,
1028     18,18,18,18,18,18,18,18,
1029     18,18,18,18,18,18,18,0,
1030     18,18,18,18,18,18,18,18,
1031     18,18,18,18,18,18,18,18,
1032     18,18,18,18,18,18,18,18,
1033     18,18,18,18,18,18,18,0,
1034     18,18,18,18,18,18,18,18
1035     };
1036    
1037    
1038    
1039 ph10 558
1040     #ifndef HAVE_STRERROR
1041 nigel 49 /*************************************************
1042 ph10 558 * Provide strerror() for non-ANSI libraries *
1043     *************************************************/
1044    
1045     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1046     in their libraries, but can provide the same facility by this simple
1047     alternative function. */
1048    
1049     extern int sys_nerr;
1050     extern char *sys_errlist[];
1051    
1052     char *
1053     strerror(int n)
1054     {
1055     if (n < 0 || n >= sys_nerr) return "unknown error number";
1056     return sys_errlist[n];
1057     }
1058     #endif /* HAVE_STRERROR */
1059    
1060    
1061 ph10 667 /*************************************************
1062     * JIT memory callback *
1063     *************************************************/
1064 ph10 558
1065 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
1066     {
1067     return (pcre_jit_stack *)arg;
1068     }
1069 ph10 558
1070 ph10 667
1071 ph10 836 #if !defined NOUTF || defined SUPPORT_PCRE16
1072 ph10 558 /*************************************************
1073 ph10 836 * Convert UTF-8 string to value *
1074     *************************************************/
1075    
1076     /* This function takes one or more bytes that represents a UTF-8 character,
1077     and returns the value of the character.
1078    
1079     Argument:
1080     utf8bytes a pointer to the byte vector
1081     vptr a pointer to an int to receive the value
1082    
1083     Returns: > 0 => the number of bytes consumed
1084     -6 to 0 => malformed UTF-8 character at offset = (-return)
1085     */
1086    
1087     static int
1088     utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089     {
1090     int c = *utf8bytes++;
1091     int d = c;
1092     int i, j, s;
1093    
1094     for (i = -1; i < 6; i++) /* i is number of additional bytes */
1095     {
1096     if ((d & 0x80) == 0) break;
1097     d <<= 1;
1098     }
1099    
1100     if (i == -1) { *vptr = c; return 1; } /* ascii character */
1101     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1102    
1103     /* i now has a value in the range 1-5 */
1104    
1105     s = 6*i;
1106     d = (c & utf8_table3[i]) << s;
1107    
1108     for (j = 0; j < i; j++)
1109     {
1110     c = *utf8bytes++;
1111     if ((c & 0xc0) != 0x80) return -(j+1);
1112     s -= 6;
1113     d |= (c & 0x3f) << s;
1114     }
1115    
1116     /* Check that encoding was the correct unique one */
1117    
1118     for (j = 0; j < utf8_table1_size; j++)
1119     if (d <= utf8_table1[j]) break;
1120     if (j != i) return -(i+1);
1121    
1122     /* Valid value */
1123    
1124     *vptr = d;
1125     return i+1;
1126     }
1127     #endif /* NOUTF || SUPPORT_PCRE16 */
1128    
1129    
1130    
1131     #if !defined NOUTF || defined SUPPORT_PCRE16
1132     /*************************************************
1133     * Convert character value to UTF-8 *
1134     *************************************************/
1135    
1136     /* This function takes an integer value in the range 0 - 0x7fffffff
1137     and encodes it as a UTF-8 character in 0 to 6 bytes.
1138    
1139     Arguments:
1140     cvalue the character value
1141     utf8bytes pointer to buffer for result - at least 6 bytes long
1142    
1143     Returns: number of characters placed in the buffer
1144     */
1145    
1146     static int
1147     ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148     {
1149     register int i, j;
1150     for (i = 0; i < utf8_table1_size; i++)
1151     if (cvalue <= utf8_table1[i]) break;
1152     utf8bytes += i;
1153     for (j = i; j > 0; j--)
1154     {
1155     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1156     cvalue >>= 6;
1157     }
1158     *utf8bytes = utf8_table2[i] | cvalue;
1159     return i + 1;
1160     }
1161 ph10 842 #endif
1162 ph10 836
1163    
1164     #ifdef SUPPORT_PCRE16
1165     /*************************************************
1166     * Convert a string to 16-bit *
1167     *************************************************/
1168    
1169     /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173     result is always left in buffer16.
1174    
1175     Note that this function does not object to surrogate values. This is
1176     deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177     for the purpose of testing that they are correctly faulted.
1178    
1179 ph10 842 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180 ph10 836 in UTF-8 so that values greater than 255 can be handled.
1181    
1182     Arguments:
1183     data TRUE if converting a data line; FALSE for a regex
1184     p points to a byte string
1185     utf true if UTF-8 (to be converted to UTF-16)
1186     len number of bytes in the string (excluding trailing zero)
1187    
1188     Returns: number of 16-bit data items used (excluding trailing zero)
1189     OR -1 if a UTF-8 string is malformed
1190     OR -2 if a value > 0x10ffff is encountered
1191 ph10 842 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192 ph10 836 */
1193    
1194     static int
1195     to16(int data, pcre_uint8 *p, int utf, int len)
1196     {
1197     pcre_uint16 *pp;
1198    
1199     if (buffer16_size < 2*len + 2)
1200     {
1201     if (buffer16 != NULL) free(buffer16);
1202     buffer16_size = 2*len + 2;
1203     buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204     if (buffer16 == NULL)
1205     {
1206     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207     exit(1);
1208     }
1209     }
1210    
1211     pp = buffer16;
1212    
1213     if (!utf && !data)
1214     {
1215     while (len-- > 0) *pp++ = *p++;
1216     }
1217    
1218     else
1219     {
1220     int c = 0;
1221     while (len > 0)
1222     {
1223     int chlen = utf82ord(p, &c);
1224     if (chlen <= 0) return -1;
1225     if (c > 0x10ffff) return -2;
1226     p += chlen;
1227     len -= chlen;
1228     if (c < 0x10000) *pp++ = c; else
1229     {
1230     if (!utf) return -3;
1231     c -= 0x10000;
1232     *pp++ = 0xD800 | (c >> 10);
1233     *pp++ = 0xDC00 | (c & 0x3ff);
1234     }
1235     }
1236     }
1237    
1238     *pp = 0;
1239     return pp - buffer16;
1240     }
1241     #endif
1242    
1243    
1244     /*************************************************
1245 nigel 91 * Read or extend an input line *
1246     *************************************************/
1247    
1248     /* Input lines are read into buffer, but both patterns and data lines can be
1249     continued over multiple input lines. In addition, if the buffer fills up, we
1250     want to automatically expand it so as to be able to handle extremely large
1251     lines that are needed for certain stress tests. When the input buffer is
1252     expanded, the other two buffers must also be expanded likewise, and the
1253     contents of pbuffer, which are a copy of the input for callouts, must be
1254     preserved (for when expansion happens for a data line). This is not the most
1255     optimal way of handling this, but hey, this is just a test program!
1256    
1257     Arguments:
1258     f the file to read
1259     start where in buffer to start (this *must* be within buffer)
1260 ph10 287 prompt for stdin or readline()
1261 nigel 91
1262     Returns: pointer to the start of new data
1263     could be a copy of start, or could be moved
1264     NULL if no data read and EOF reached
1265     */
1266    
1267 ph10 836 static pcre_uint8 *
1268     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269 nigel 91 {
1270 ph10 836 pcre_uint8 *here = start;
1271 nigel 91
1272     for (;;)
1273     {
1274 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
1275 nigel 93
1276 nigel 91 if (rlen > 1000)
1277     {
1278     int dlen;
1279 ph10 289
1280 ph10 287 /* If libreadline support is required, use readline() to read a line if the
1281     input is a terminal. Note that readline() removes the trailing newline, so
1282     we must put it back again, to be compatible with fgets(). */
1283 ph10 289
1284 ph10 287 #ifdef SUPPORT_LIBREADLINE
1285     if (isatty(fileno(f)))
1286     {
1287 ph10 289 size_t len;
1288 ph10 287 char *s = readline(prompt);
1289     if (s == NULL) return (here == start)? NULL : start;
1290     len = strlen(s);
1291 ph10 289 if (len > 0) add_history(s);
1292 ph10 287 if (len > rlen - 1) len = rlen - 1;
1293     memcpy(here, s, len);
1294     here[len] = '\n';
1295 ph10 289 here[len+1] = 0;
1296     free(s);
1297 ph10 287 }
1298 ph10 289 else
1299     #endif
1300    
1301 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
1302 ph10 289
1303 ph10 287 {
1304 ph10 516 if (f == stdin) printf("%s", prompt);
1305 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
1306     return (here == start)? NULL : start;
1307 ph10 289 }
1308    
1309 nigel 91 dlen = (int)strlen((char *)here);
1310     if (dlen > 0 && here[dlen - 1] == '\n') return start;
1311     here += dlen;
1312     }
1313    
1314     else
1315     {
1316     int new_buffer_size = 2*buffer_size;
1317 ph10 836 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320 nigel 91
1321     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322     {
1323     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1324     exit(1);
1325     }
1326    
1327     memcpy(new_buffer, buffer, buffer_size);
1328     memcpy(new_pbuffer, pbuffer, buffer_size);
1329    
1330     buffer_size = new_buffer_size;
1331    
1332     start = new_buffer + (start - buffer);
1333     here = new_buffer + (here - buffer);
1334    
1335     free(buffer);
1336     free(dbuffer);
1337     free(pbuffer);
1338    
1339     buffer = new_buffer;
1340     dbuffer = new_dbuffer;
1341     pbuffer = new_pbuffer;
1342     }
1343     }
1344    
1345     return NULL; /* Control never gets here */
1346     }
1347    
1348    
1349    
1350     /*************************************************
1351 nigel 63 * Read number from string *
1352     *************************************************/
1353    
1354     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1355     around with conditional compilation, just do the job by hand. It is only used
1356 nigel 93 for unpicking arguments, so just keep it simple.
1357 nigel 63
1358     Arguments:
1359     str string to be converted
1360     endptr where to put the end pointer
1361    
1362     Returns: the unsigned long
1363     */
1364    
1365     static int
1366 ph10 836 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367 nigel 63 {
1368     int result = 0;
1369     while(*str != 0 && isspace(*str)) str++;
1370     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1371     *endptr = str;
1372     return(result);
1373     }
1374    
1375    
1376    
1377 nigel 49 /*************************************************
1378 ph10 836 * Print one character *
1379 nigel 49 *************************************************/
1380    
1381 ph10 836 /* Print a single character either literally, or as a hex escape. */
1382 nigel 49
1383 ph10 836 static int pchar(int c, FILE *f)
1384 nigel 49 {
1385 ph10 836 if (PRINTOK(c))
1386     {
1387     if (f != NULL) fprintf(f, "%c", c);
1388     return 1;
1389     }
1390 nigel 49
1391 ph10 836 if (c < 0x100)
1392 nigel 49 {
1393 ph10 836 if (use_utf)
1394     {
1395     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396     return 6;
1397     }
1398     else
1399     {
1400     if (f != NULL) fprintf(f, "\\x%02x", c);
1401     return 4;
1402     }
1403 nigel 49 }
1404    
1405 ph10 836 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406     return (c <= 0x000000ff)? 6 :
1407     (c <= 0x00000fff)? 7 :
1408     (c <= 0x0000ffff)? 8 :
1409     (c <= 0x000fffff)? 9 : 10;
1410     }
1411 nigel 49
1412    
1413    
1414 ph10 836 #ifdef SUPPORT_PCRE8
1415     /*************************************************
1416     * Print 8-bit character string *
1417     *************************************************/
1418 nigel 49
1419 ph10 836 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420     If handed a NULL file, just counts chars without printing. */
1421 nigel 49
1422 ph10 836 static int pchars(pcre_uint8 *p, int length, FILE *f)
1423     {
1424     int c = 0;
1425     int yield = 0;
1426 nigel 49
1427 ph10 836 if (length < 0)
1428     length = strlen((char *)p);
1429 nigel 49
1430 ph10 836 while (length-- > 0)
1431     {
1432     #if !defined NOUTF
1433     if (use_utf)
1434     {
1435     int rc = utf82ord(p, &c);
1436     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1437     {
1438     length -= rc - 1;
1439     p += rc;
1440     yield += pchar(c, f);
1441     continue;
1442     }
1443     }
1444     #endif
1445     c = *p++;
1446     yield += pchar(c, f);
1447     }
1448    
1449     return yield;
1450 nigel 49 }
1451 nigel 79 #endif
1452 nigel 49
1453    
1454 nigel 79
1455 ph10 836 #ifdef SUPPORT_PCRE16
1456 nigel 63 /*************************************************
1457 ph10 836 * Find length of 0-terminated 16-bit string *
1458 nigel 85 *************************************************/
1459    
1460 ph10 836 static int strlen16(PCRE_SPTR16 p)
1461 nigel 85 {
1462 ph10 836 int len = 0;
1463     while (*p++ != 0) len++;
1464     return len;
1465 nigel 85 }
1466 ph10 836 #endif /* SUPPORT_PCRE16 */
1467 nigel 85
1468    
1469 ph10 836 #ifdef SUPPORT_PCRE16
1470 nigel 85 /*************************************************
1471 ph10 836 * Print 16-bit character string *
1472 nigel 63 *************************************************/
1473 nigel 49
1474 ph10 836 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475     If handed a NULL file, just counts chars without printing. */
1476 nigel 49
1477 ph10 836 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478 nigel 3 {
1479 nigel 63 int yield = 0;
1480 nigel 3
1481 ph10 836 if (length < 0)
1482     length = strlen16(p);
1483    
1484 nigel 63 while (length-- > 0)
1485 nigel 3 {
1486 ph10 836 int c = *p++ & 0xffff;
1487     #if !defined NOUTF
1488     if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489 nigel 63 {
1490 ph10 836 int d = *p & 0xffff;
1491     if (d >= 0xDC00 && d < 0xDFFF)
1492 nigel 63 {
1493 ph10 836 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494     length--;
1495     p++;
1496 nigel 63 }
1497     }
1498 nigel 79 #endif
1499 ph10 836 yield += pchar(c, f);
1500     }
1501 nigel 3
1502 ph10 836 return yield;
1503     }
1504     #endif /* SUPPORT_PCRE16 */
1505 nigel 63
1506 ph10 836
1507    
1508     #ifdef SUPPORT_PCRE8
1509     /*************************************************
1510     * Read a capture name (8-bit) and check it *
1511     *************************************************/
1512    
1513     static pcre_uint8 *
1514     read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515     {
1516     pcre_uint8 *npp = *pp;
1517     while (isalnum(*p)) *npp++ = *p++;
1518     *npp++ = 0;
1519     *npp = 0;
1520     if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521     {
1522     fprintf(outfile, "no parentheses with name \"");
1523     PCHARSV(*pp, 0, -1, outfile);
1524     fprintf(outfile, "\"\n");
1525 nigel 63 }
1526 nigel 3
1527 ph10 836 *pp = npp;
1528     return p;
1529 nigel 63 }
1530 ph10 836 #endif /* SUPPORT_PCRE8 */
1531 nigel 23
1532 nigel 3
1533 nigel 23
1534 ph10 836 #ifdef SUPPORT_PCRE16
1535 nigel 63 /*************************************************
1536 ph10 836 * Read a capture name (16-bit) and check it *
1537     *************************************************/
1538    
1539     /* Note that the text being read is 8-bit. */
1540    
1541     static pcre_uint8 *
1542     read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543     {
1544     pcre_uint16 *npp = *pp;
1545     while (isalnum(*p)) *npp++ = *p++;
1546     *npp++ = 0;
1547     *npp = 0;
1548 zherczeg 852 if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549 ph10 836 {
1550     fprintf(outfile, "no parentheses with name \"");
1551     PCHARSV(*pp, 0, -1, outfile);
1552     fprintf(outfile, "\"\n");
1553     }
1554     *pp = npp;
1555     return p;
1556     }
1557     #endif /* SUPPORT_PCRE16 */
1558    
1559    
1560    
1561     /*************************************************
1562 nigel 63 * Callout function *
1563     *************************************************/
1564 nigel 3
1565 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1566     the match. Yield zero unless more callouts than the fail count, or the callout
1567     data is not zero. */
1568 nigel 3
1569 nigel 63 static int callout(pcre_callout_block *cb)
1570     {
1571     FILE *f = (first_callout | callout_extra)? outfile : NULL;
1572 nigel 75 int i, pre_start, post_start, subject_length;
1573 nigel 3
1574 nigel 63 if (callout_extra)
1575     {
1576     fprintf(f, "Callout %d: last capture = %d\n",
1577     cb->callout_number, cb->capture_last);
1578 nigel 3
1579 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
1580     {
1581     if (cb->offset_vector[i] < 0)
1582     fprintf(f, "%2d: <unset>\n", i/2);
1583     else
1584     {
1585     fprintf(f, "%2d: ", i/2);
1586 ph10 836 PCHARSV(cb->subject, cb->offset_vector[i],
1587 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588     fprintf(f, "\n");
1589     }
1590     }
1591     }
1592 nigel 3
1593 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
1594     datails. On subsequent calls in the same match, we use pchars just to find the
1595     printed lengths of the substrings. */
1596 nigel 3
1597 nigel 63 if (f != NULL) fprintf(f, "--->");
1598 nigel 3
1599 ph10 836 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600     PCHARS(post_start, cb->subject, cb->start_match,
1601 nigel 63 cb->current_position - cb->start_match, f);
1602 nigel 3
1603 ph10 836 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604 nigel 75
1605 ph10 836 PCHARSV(cb->subject, cb->current_position,
1606 nigel 63 cb->subject_length - cb->current_position, f);
1607 nigel 3
1608 nigel 63 if (f != NULL) fprintf(f, "\n");
1609 nigel 9
1610 nigel 63 /* Always print appropriate indicators, with callout number if not already
1611 nigel 75 shown. For automatic callouts, show the pattern offset. */
1612 nigel 3
1613 nigel 75 if (cb->callout_number == 255)
1614     {
1615     fprintf(outfile, "%+3d ", cb->pattern_position);
1616     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1617     }
1618     else
1619     {
1620     if (callout_extra) fprintf(outfile, " ");
1621     else fprintf(outfile, "%3d ", cb->callout_number);
1622     }
1623 nigel 3
1624 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1625     fprintf(outfile, "^");
1626 nigel 3
1627 nigel 63 if (post_start > 0)
1628     {
1629     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1630     fprintf(outfile, "^");
1631 nigel 3 }
1632    
1633 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1634     fprintf(outfile, " ");
1635    
1636     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1637     pbuffer + cb->pattern_position);
1638    
1639 nigel 63 fprintf(outfile, "\n");
1640     first_callout = 0;
1641 nigel 3
1642 ph10 654 if (cb->mark != last_callout_mark)
1643 ph10 645 {
1644 ph10 836 if (cb->mark == NULL)
1645     fprintf(outfile, "Latest Mark: <unset>\n");
1646     else
1647     {
1648     fprintf(outfile, "Latest Mark: ");
1649     PCHARSV(cb->mark, 0, -1, outfile);
1650     putc('\n', outfile);
1651     }
1652 ph10 654 last_callout_mark = cb->mark;
1653     }
1654 ph10 645
1655 nigel 71 if (cb->callout_data != NULL)
1656 nigel 49 {
1657 nigel 71 int callout_data = *((int *)(cb->callout_data));
1658     if (callout_data != 0)
1659     {
1660     fprintf(outfile, "Callout data = %d\n", callout_data);
1661     return callout_data;
1662     }
1663 nigel 63 }
1664 nigel 49
1665 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
1666     (++callout_count >= callout_fail_count)? 1 : 0;
1667 nigel 3 }
1668    
1669    
1670 nigel 63 /*************************************************
1671 nigel 73 * Local malloc functions *
1672 nigel 63 *************************************************/
1673 nigel 3
1674 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1675 ph10 836 compiled re, which is the first store request that pcre_compile() makes. The
1676     show_malloc variable is set only during matching. */
1677 nigel 3
1678     static void *new_malloc(size_t size)
1679     {
1680 nigel 73 void *block = malloc(size);
1681 nigel 43 gotten_store = size;
1682 ph10 836 if (first_gotten_store == 0) first_gotten_store = size;
1683 nigel 73 if (show_malloc)
1684 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1685 nigel 73 return block;
1686 nigel 3 }
1687    
1688 nigel 73 static void new_free(void *block)
1689     {
1690     if (show_malloc)
1691     fprintf(outfile, "free %p\n", block);
1692     free(block);
1693     }
1694 nigel 3
1695 nigel 73 /* For recursion malloc/free, to test stacking calls */
1696    
1697     static void *stack_malloc(size_t size)
1698     {
1699     void *block = malloc(size);
1700     if (show_malloc)
1701 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1702 nigel 73 return block;
1703     }
1704    
1705     static void stack_free(void *block)
1706     {
1707     if (show_malloc)
1708     fprintf(outfile, "stack_free %p\n", block);
1709     free(block);
1710     }
1711    
1712    
1713 nigel 63 /*************************************************
1714     * Call pcre_fullinfo() *
1715     *************************************************/
1716 nigel 43
1717 ph10 836 /* Get one piece of information from the pcre_fullinfo() function. When only
1718     one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719     value, but the code is defensive.
1720 nigel 43
1721 ph10 836 Arguments:
1722     re compiled regex
1723     study study data
1724     option PCRE_INFO_xxx option
1725     ptr where to put the data
1726    
1727     Returns: 0 when OK, < 0 on error
1728     */
1729    
1730     static int
1731     new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732 nigel 43 {
1733     int rc;
1734 ph10 836
1735     if (use_pcre16)
1736     #ifdef SUPPORT_PCRE16
1737 zherczeg 852 rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738 ph10 836 #else
1739     rc = PCRE_ERROR_BADMODE;
1740     #endif
1741     else
1742     #ifdef SUPPORT_PCRE8
1743     rc = pcre_fullinfo(re, study, option, ptr);
1744     #else
1745     rc = PCRE_ERROR_BADMODE;
1746     #endif
1747    
1748     if (rc < 0)
1749     {
1750     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751     use_pcre16? "16" : "", option);
1752     if (rc == PCRE_ERROR_BADMODE)
1753     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755     }
1756    
1757     return rc;
1758 nigel 43 }
1759    
1760    
1761    
1762 nigel 63 /*************************************************
1763 ph10 836 * Swap byte functions *
1764 nigel 75 *************************************************/
1765    
1766 ph10 836 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767     value, respectively.
1768    
1769     Arguments:
1770     value any number
1771    
1772     Returns: the byte swapped value
1773     */
1774    
1775     static pcre_uint32
1776     swap_uint32(pcre_uint32 value)
1777 nigel 75 {
1778     return ((value & 0x000000ff) << 24) |
1779     ((value & 0x0000ff00) << 8) |
1780     ((value & 0x00ff0000) >> 8) |
1781 ph10 836 (value >> 24);
1782 nigel 75 }
1783    
1784 ph10 836 static pcre_uint16
1785     swap_uint16(pcre_uint16 value)
1786     {
1787     return (value >> 8) | (value << 8);
1788     }
1789 nigel 75
1790    
1791    
1792     /*************************************************
1793 ph10 836 * Flip bytes in a compiled pattern *
1794     *************************************************/
1795    
1796     /* This function is called if the 'F' option was present on a pattern that is
1797     to be written to a file. We flip the bytes of all the integer fields in the
1798     regex data block and the study block. In 16-bit mode this also flips relevant
1799     bytes in the pattern itself. This is to make it possible to test PCRE's
1800     ability to reload byte-flipped patterns, e.g. those compiled on a different
1801     architecture. */
1802    
1803     static void
1804     regexflip(pcre *ere, pcre_extra *extra)
1805     {
1806 zherczeg 852 REAL_PCRE *re = (REAL_PCRE *)ere;
1807 ph10 836 #ifdef SUPPORT_PCRE16
1808     int op;
1809     pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810     int length = re->name_count * re->name_entry_size;
1811     #ifdef SUPPORT_UTF
1812     BOOL utf = (re->options & PCRE_UTF16) != 0;
1813     BOOL utf16_char = FALSE;
1814     #endif /* SUPPORT_UTF */
1815     #endif /* SUPPORT_PCRE16 */
1816    
1817     /* Always flip the bytes in the main data block and study blocks. */
1818    
1819     re->magic_number = REVERSED_MAGIC_NUMBER;
1820     re->size = swap_uint32(re->size);
1821     re->options = swap_uint32(re->options);
1822     re->flags = swap_uint16(re->flags);
1823     re->top_bracket = swap_uint16(re->top_bracket);
1824     re->top_backref = swap_uint16(re->top_backref);
1825     re->first_char = swap_uint16(re->first_char);
1826     re->req_char = swap_uint16(re->req_char);
1827     re->name_table_offset = swap_uint16(re->name_table_offset);
1828     re->name_entry_size = swap_uint16(re->name_entry_size);
1829     re->name_count = swap_uint16(re->name_count);
1830    
1831     if (extra != NULL)
1832     {
1833     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834     rsd->size = swap_uint32(rsd->size);
1835     rsd->flags = swap_uint32(rsd->flags);
1836     rsd->minlength = swap_uint32(rsd->minlength);
1837     }
1838    
1839     /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840     in the name table, if present, and then in the pattern itself. */
1841    
1842     #ifdef SUPPORT_PCRE16
1843     if (!use_pcre16) return;
1844    
1845     while(TRUE)
1846     {
1847     /* Swap previous characters. */
1848     while (length-- > 0)
1849     {
1850     *ptr = swap_uint16(*ptr);
1851     ptr++;
1852     }
1853     #ifdef SUPPORT_UTF
1854     if (utf16_char)
1855     {
1856     if ((ptr[-1] & 0xfc00) == 0xd800)
1857     {
1858     /* We know that there is only one extra character in UTF-16. */
1859     *ptr = swap_uint16(*ptr);
1860     ptr++;
1861     }
1862     }
1863     utf16_char = FALSE;
1864     #endif /* SUPPORT_UTF */
1865    
1866     /* Get next opcode. */
1867    
1868     length = 0;
1869     op = *ptr;
1870     *ptr++ = swap_uint16(op);
1871    
1872     switch (op)
1873     {
1874     case OP_END:
1875     return;
1876    
1877     #ifdef SUPPORT_UTF
1878     case OP_CHAR:
1879     case OP_CHARI:
1880     case OP_NOT:
1881     case OP_NOTI:
1882     case OP_STAR:
1883     case OP_MINSTAR:
1884     case OP_PLUS:
1885     case OP_MINPLUS:
1886     case OP_QUERY:
1887     case OP_MINQUERY:
1888     case OP_UPTO:
1889     case OP_MINUPTO:
1890     case OP_EXACT:
1891     case OP_POSSTAR:
1892     case OP_POSPLUS:
1893     case OP_POSQUERY:
1894     case OP_POSUPTO:
1895     case OP_STARI:
1896     case OP_MINSTARI:
1897     case OP_PLUSI:
1898     case OP_MINPLUSI:
1899     case OP_QUERYI:
1900     case OP_MINQUERYI:
1901     case OP_UPTOI:
1902     case OP_MINUPTOI:
1903     case OP_EXACTI:
1904     case OP_POSSTARI:
1905     case OP_POSPLUSI:
1906     case OP_POSQUERYI:
1907     case OP_POSUPTOI:
1908     case OP_NOTSTAR:
1909     case OP_NOTMINSTAR:
1910     case OP_NOTPLUS:
1911     case OP_NOTMINPLUS:
1912     case OP_NOTQUERY:
1913     case OP_NOTMINQUERY:
1914     case OP_NOTUPTO:
1915     case OP_NOTMINUPTO:
1916     case OP_NOTEXACT:
1917     case OP_NOTPOSSTAR:
1918     case OP_NOTPOSPLUS:
1919     case OP_NOTPOSQUERY:
1920     case OP_NOTPOSUPTO:
1921     case OP_NOTSTARI:
1922     case OP_NOTMINSTARI:
1923     case OP_NOTPLUSI:
1924     case OP_NOTMINPLUSI:
1925     case OP_NOTQUERYI:
1926     case OP_NOTMINQUERYI:
1927     case OP_NOTUPTOI:
1928     case OP_NOTMINUPTOI:
1929     case OP_NOTEXACTI:
1930     case OP_NOTPOSSTARI:
1931     case OP_NOTPOSPLUSI:
1932     case OP_NOTPOSQUERYI:
1933     case OP_NOTPOSUPTOI:
1934     if (utf) utf16_char = TRUE;
1935     #endif
1936     /* Fall through. */
1937    
1938     default:
1939     length = OP_lengths16[op] - 1;
1940     break;
1941    
1942     case OP_CLASS:
1943     case OP_NCLASS:
1944     /* Skip the character bit map. */
1945     ptr += 32/sizeof(pcre_uint16);
1946     length = 0;
1947     break;
1948    
1949     case OP_XCLASS:
1950 zherczeg 839 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951     if (LINK_SIZE > 1)
1952     length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953     - (1 + LINK_SIZE + 1));
1954     else
1955     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956    
1957 ph10 836 /* Reverse the size of the XCLASS instance. */
1958 zherczeg 839 *ptr = swap_uint16(*ptr);
1959 ph10 836 ptr++;
1960     if (LINK_SIZE > 1)
1961     {
1962 zherczeg 839 *ptr = swap_uint16(*ptr);
1963 ph10 836 ptr++;
1964     }
1965    
1966     op = *ptr;
1967     *ptr = swap_uint16(op);
1968 zherczeg 839 ptr++;
1969 ph10 836 if ((op & XCL_MAP) != 0)
1970     {
1971     /* Skip the character bit map. */
1972     ptr += 32/sizeof(pcre_uint16);
1973     length -= 32/sizeof(pcre_uint16);
1974     }
1975     break;
1976     }
1977     }
1978     /* Control should never reach here in 16 bit mode. */
1979     #endif /* SUPPORT_PCRE16 */
1980     }
1981    
1982    
1983    
1984     /*************************************************
1985 nigel 87 * Check match or recursion limit *
1986     *************************************************/
1987    
1988     static int
1989 ph10 836 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
1991     int flag, unsigned long int *limit, int errnumber, const char *msg)
1992     {
1993     int count;
1994     int min = 0;
1995     int mid = 64;
1996     int max = -1;
1997    
1998     extra->flags |= flag;
1999    
2000     for (;;)
2001     {
2002     *limit = mid;
2003    
2004 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005 nigel 87 use_offsets, use_size_offsets);
2006    
2007     if (count == errnumber)
2008     {
2009     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2010     min = mid;
2011     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2012     }
2013    
2014     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2015     count == PCRE_ERROR_PARTIAL)
2016     {
2017     if (mid == min + 1)
2018     {
2019     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2020     break;
2021     }
2022     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2023     max = mid;
2024     mid = (min + mid)/2;
2025     }
2026     else break; /* Some other error */
2027     }
2028    
2029     extra->flags &= ~flag;
2030     return count;
2031     }
2032    
2033    
2034    
2035     /*************************************************
2036 ph10 227 * Case-independent strncmp() function *
2037     *************************************************/
2038    
2039     /*
2040     Arguments:
2041     s first string
2042     t second string
2043     n number of characters to compare
2044    
2045     Returns: < 0, = 0, or > 0, according to the comparison
2046     */
2047    
2048     static int
2049 ph10 836 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050 ph10 227 {
2051     while (n--)
2052     {
2053     int c = tolower(*s++) - tolower(*t++);
2054     if (c) return c;
2055     }
2056     return 0;
2057     }
2058    
2059    
2060    
2061     /*************************************************
2062 nigel 91 * Check newline indicator *
2063     *************************************************/
2064    
2065 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2066     a message and return 0 if there is no match.
2067 nigel 91
2068     Arguments:
2069     p points after the leading '<'
2070     f file for error message
2071    
2072     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2073     */
2074    
2075     static int
2076 ph10 836 check_newline(pcre_uint8 *p, FILE *f)
2077 nigel 91 {
2078 ph10 836 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
2086     return 0;
2087     }
2088    
2089    
2090    
2091     /*************************************************
2092 nigel 93 * Usage function *
2093     *************************************************/
2094    
2095     static void
2096     usage(void)
2097     {
2098 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2099     printf("Input and output default to stdin and stdout.\n");
2100     #ifdef SUPPORT_LIBREADLINE
2101     printf("If input is a terminal, readline() is used to read from it.\n");
2102     #else
2103     printf("This version of pcretest is not linked with readline().\n");
2104     #endif
2105     printf("\nOptions:\n");
2106 ph10 836 #ifdef SUPPORT_PCRE16
2107 ph10 862 printf(" -16 use the 16-bit library\n");
2108 ph10 836 #endif
2109 ph10 862 printf(" -b show compiled code\n");
2110 nigel 93 printf(" -C show PCRE compile-time options and exit\n");
2111 ph10 836 printf(" -C arg show a specific compile-time option\n");
2112     printf(" and exit with its value. The arg can be:\n");
2113     printf(" linksize internal link size [2, 3, 4]\n");
2114     printf(" pcre8 8 bit library support enabled [0, 1]\n");
2115     printf(" pcre16 16 bit library support enabled [0, 1]\n");
2116     printf(" utf Unicode Transformation Format supported [0, 1]\n");
2117     printf(" ucp Unicode Properties supported [0, 1]\n");
2118     printf(" jit Just-in-time compiler supported [0, 1]\n");
2119 zherczeg 839 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120 nigel 93 printf(" -d debug: show compiled code and information (-b and -i)\n");
2121     #if !defined NODFA
2122     printf(" -dfa force DFA matching for all subjects\n");
2123     #endif
2124     printf(" -help show usage information\n");
2125     printf(" -i show information about compiled patterns\n"
2126 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
2127 nigel 93 " -m output memory used information\n"
2128     " -o <n> set size of offsets vector to <n>\n");
2129     #if !defined NOPOSIX
2130     printf(" -p use POSIX interface\n");
2131     #endif
2132     printf(" -q quiet: do not output PCRE version number at start\n");
2133     printf(" -S <n> set stack size to <n> megabytes\n");
2134 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
2135     " -s+ force each pattern to be studied, using JIT if available\n"
2136 nigel 93 " -t time compilation and execution\n");
2137     printf(" -t <n> time compilation and execution, repeating <n> times\n");
2138     printf(" -tm time execution (matching) only\n");
2139     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2140     }
2141    
2142    
2143    
2144     /*************************************************
2145 nigel 63 * Main Program *
2146     *************************************************/
2147 nigel 43
2148 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
2149     consist of a regular expression, in delimiters and optionally followed by
2150     options, followed by a set of test data, terminated by an empty line. */
2151    
2152     int main(int argc, char **argv)
2153     {
2154     FILE *infile = stdin;
2155 ph10 836 const char *version;
2156 nigel 3 int options = 0;
2157     int study_options = 0;
2158 ph10 386 int default_find_match_limit = FALSE;
2159 nigel 3 int op = 1;
2160     int timeit = 0;
2161 nigel 93 int timeitm = 0;
2162 nigel 3 int showinfo = 0;
2163 nigel 31 int showstore = 0;
2164 ph10 667 int force_study = -1;
2165     int force_study_options = 0;
2166 nigel 87 int quiet = 0;
2167 nigel 53 int size_offsets = 45;
2168     int size_offsets_max;
2169 nigel 77 int *offsets = NULL;
2170 nigel 53 #if !defined NOPOSIX
2171 nigel 3 int posix = 0;
2172 nigel 53 #endif
2173 nigel 3 int debug = 0;
2174 nigel 11 int done = 0;
2175 nigel 77 int all_use_dfa = 0;
2176     int yield = 0;
2177 nigel 91 int stack_size;
2178 nigel 3
2179 ph10 667 pcre_jit_stack *jit_stack = NULL;
2180    
2181 ph10 836 /* These vectors store, end-to-end, a list of zero-terminated captured
2182     substring names, each list itself being terminated by an empty name. Assume
2183     that 1024 is plenty long enough for the few names we'll be testing. It is
2184     easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185 ph10 881 for the actual memory, to ensure alignment. */
2186 ph10 667
2187 ph10 836 pcre_uint16 copynames[1024];
2188     pcre_uint16 getnames[1024];
2189 nigel 69
2190 ph10 881 #ifdef SUPPORT_PCRE16
2191 ph10 836 pcre_uint16 *cn16ptr;
2192     pcre_uint16 *gn16ptr;
2193 ph10 881 #endif
2194 nigel 91
2195 ph10 881 #ifdef SUPPORT_PCRE8
2196 ph10 836 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2197     pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2198     pcre_uint8 *cn8ptr;
2199     pcre_uint8 *gn8ptr;
2200 ph10 881 #endif
2201 nigel 91
2202 ph10 836 /* Get buffers from malloc() so that valgrind will check their misuse when
2203     debugging. They grow automatically when very long lines are read. The 16-bit
2204     buffer (buffer16) is obtained only if needed. */
2205 nigel 69
2206 ph10 836 buffer = (pcre_uint8 *)malloc(buffer_size);
2207     dbuffer = (pcre_uint8 *)malloc(buffer_size);
2208     pbuffer = (pcre_uint8 *)malloc(buffer_size);
2209 nigel 69
2210 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
2211 nigel 3
2212 nigel 93 outfile = stdout;
2213    
2214     /* The following _setmode() stuff is some Windows magic that tells its runtime
2215     library to translate CRLF into a single LF character. At least, that's what
2216     I've been told: never having used Windows I take this all on trust. Originally
2217     it set 0x8000, but then I was advised that _O_BINARY was better. */
2218    
2219 nigel 75 #if defined(_WIN32) || defined(WIN32)
2220 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
2221     #endif
2222 nigel 75
2223 ph10 836 /* Get the version number: both pcre_version() and pcre16_version() give the
2224     same answer. We just need to ensure that we call one that is available. */
2225    
2226     #ifdef SUPPORT_PCRE8
2227     version = pcre_version();
2228     #else
2229     version = pcre16_version();
2230     #endif
2231    
2232 nigel 3 /* Scan options */
2233    
2234     while (argc > 1 && argv[op][0] == '-')
2235     {
2236 ph10 836 pcre_uint8 *endptr;
2237 nigel 53
2238 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2239 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2240 ph10 691 else if (strcmp(argv[op], "-s+") == 0)
2241 ph10 667 {
2242     force_study = 1;
2243     force_study_options = PCRE_STUDY_JIT_COMPILE;
2244 ph10 691 }
2245 ph10 836 else if (strcmp(argv[op], "-16") == 0)
2246     {
2247     #ifdef SUPPORT_PCRE16
2248     use_pcre16 = 1;
2249     #else
2250     printf("** This version of PCRE was built without 16-bit support\n");
2251     exit(1);
2252     #endif
2253     }
2254 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2255 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2256 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2257     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2258 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2259 nigel 79 #if !defined NODFA
2260 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2261 nigel 79 #endif
2262 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2263 ph10 836 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2264 nigel 65 *endptr == 0))
2265 nigel 53 {
2266     op++;
2267     argc--;
2268     }
2269 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2270     {
2271     int both = argv[op][2] == 0;
2272     int temp;
2273 ph10 836 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2274 nigel 93 *endptr == 0))
2275     {
2276     timeitm = temp;
2277     op++;
2278     argc--;
2279     }
2280     else timeitm = LOOPREPEAT;
2281     if (both) timeit = timeitm;
2282     }
2283 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2284 ph10 836 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2285 nigel 91 *endptr == 0))
2286     {
2287 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2288 nigel 91 printf("PCRE: -S not supported on this OS\n");
2289     exit(1);
2290     #else
2291     int rc;
2292     struct rlimit rlim;
2293     getrlimit(RLIMIT_STACK, &rlim);
2294     rlim.rlim_cur = stack_size * 1024 * 1024;
2295     rc = setrlimit(RLIMIT_STACK, &rlim);
2296     if (rc != 0)
2297     {
2298     printf("PCRE: setrlimit() failed with error %d\n", rc);
2299     exit(1);
2300     }
2301     op++;
2302     argc--;
2303     #endif
2304     }
2305 nigel 53 #if !defined NOPOSIX
2306 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2307 nigel 53 #endif
2308 nigel 63 else if (strcmp(argv[op], "-C") == 0)
2309     {
2310     int rc;
2311 ph10 392 unsigned long int lrc;
2312 ph10 836
2313     if (argc > 2)
2314     {
2315     if (strcmp(argv[op + 1], "linksize") == 0)
2316     {
2317     (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2318     printf("%d\n", rc);
2319     yield = rc;
2320     goto EXIT;
2321     }
2322     if (strcmp(argv[op + 1], "pcre8") == 0)
2323     {
2324     #ifdef SUPPORT_PCRE8
2325     printf("1\n");
2326     yield = 1;
2327     #else
2328     printf("0\n");
2329     yield = 0;
2330     #endif
2331     goto EXIT;
2332     }
2333     if (strcmp(argv[op + 1], "pcre16") == 0)
2334     {
2335     #ifdef SUPPORT_PCRE16
2336     printf("1\n");
2337     yield = 1;
2338     #else
2339     printf("0\n");
2340     yield = 0;
2341     #endif
2342     goto EXIT;
2343     }
2344     if (strcmp(argv[op + 1], "utf") == 0)
2345     {
2346     #ifdef SUPPORT_PCRE8
2347     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2348     printf("%d\n", rc);
2349     yield = rc;
2350     #else
2351     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2352     printf("%d\n", rc);
2353     yield = rc;
2354     #endif
2355     goto EXIT;
2356     }
2357     if (strcmp(argv[op + 1], "ucp") == 0)
2358     {
2359     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2360     printf("%d\n", rc);
2361     yield = rc;
2362     goto EXIT;
2363     }
2364     if (strcmp(argv[op + 1], "jit") == 0)
2365     {
2366     (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2367     printf("%d\n", rc);
2368     yield = rc;
2369     goto EXIT;
2370     }
2371 ph10 838 if (strcmp(argv[op + 1], "newline") == 0)
2372 ph10 842 {
2373 ph10 838 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2374     /* Note that these values are always the ASCII values, even
2375     in EBCDIC environments. CR is 13 and NL is 10. */
2376     printf("%s\n", (rc == 13)? "CR" :
2377     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2378     (rc == -2)? "ANYCRLF" :
2379     (rc == -1)? "ANY" : "???");
2380     goto EXIT;
2381 ph10 842 }
2382 ph10 838 printf("Unknown -C option: %s\n", argv[op + 1]);
2383 ph10 836 goto EXIT;
2384     }
2385    
2386     printf("PCRE version %s\n", version);
2387 nigel 63 printf("Compiled with\n");
2388 ph10 836
2389     /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2390     are set, either both UTFs are supported or both are not supported. */
2391    
2392     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2393     printf(" 8-bit and 16-bit support\n");
2394 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2395 ph10 836 if (rc)
2396     printf(" UTF-8 and UTF-16 support\n");
2397     else
2398     printf(" No UTF-8 or UTF-16 support\n");
2399     #elif defined SUPPORT_PCRE8
2400     printf(" 8-bit support only\n");
2401     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2402 nigel 63 printf(" %sUTF-8 support\n", rc? "" : "No ");
2403 ph10 836 #else
2404     printf(" 16-bit support only\n");
2405     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2406     printf(" %sUTF-16 support\n", rc? "" : "No ");
2407     #endif
2408    
2409     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2410 nigel 75 printf(" %sUnicode properties support\n", rc? "" : "No ");
2411 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2412 ph10 674 if (rc)
2413 ph10 890 {
2414     const char *arch;
2415     (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, &arch);
2416     printf(" Just-in-time compiler support: %s\n", arch);
2417     }
2418 ph10 674 else
2419     printf(" No just-in-time compiler support\n");
2420 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2421 ph10 391 /* Note that these values are always the ASCII values, even
2422 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
2423 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2424     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2425 ph10 150 (rc == -2)? "ANYCRLF" :
2426 nigel 93 (rc == -1)? "ANY" : "???");
2427 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2428 ph10 231 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2429     "all Unicode newlines");
2430 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2431 nigel 63 printf(" Internal link size = %d\n", rc);
2432 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2433 nigel 63 printf(" POSIX malloc threshold = %d\n", rc);
2434 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2435 ph10 376 printf(" Default match limit = %ld\n", lrc);
2436 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2437 ph10 376 printf(" Default recursion depth limit = %ld\n", lrc);
2438 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2439 ph10 893 printf(" Match recursion uses %s: ", rc? "stack" : "heap");
2440     PCRE_EXEC(rc, NULL, NULL, NULL, -1, -1, 0, NULL, 0);
2441     printf("frame size = %d bytes\n", -rc);
2442 ph10 121 goto EXIT;
2443 nigel 63 }
2444 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
2445     strcmp(argv[op], "--help") == 0)
2446     {
2447     usage();
2448     goto EXIT;
2449     }
2450 nigel 3 else
2451     {
2452 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
2453 nigel 93 usage();
2454 nigel 77 yield = 1;
2455     goto EXIT;
2456 nigel 3 }
2457     op++;
2458     argc--;
2459     }
2460    
2461 nigel 53 /* Get the store for the offsets vector, and remember what it was */
2462    
2463     size_offsets_max = size_offsets;
2464 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2465 nigel 53 if (offsets == NULL)
2466     {
2467     printf("** Failed to get %d bytes of memory for offsets vector\n",
2468 ph10 151 (int)(size_offsets_max * sizeof(int)));
2469 nigel 77 yield = 1;
2470     goto EXIT;
2471 nigel 53 }
2472    
2473 nigel 3 /* Sort out the input and output files */
2474    
2475     if (argc > 1)
2476     {
2477 nigel 93 infile = fopen(argv[op], INPUT_MODE);
2478 nigel 3 if (infile == NULL)
2479     {
2480     printf("** Failed to open %s\n", argv[op]);
2481 nigel 77 yield = 1;
2482     goto EXIT;
2483 nigel 3 }
2484     }
2485    
2486     if (argc > 2)
2487     {
2488 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
2489 nigel 3 if (outfile == NULL)
2490     {
2491     printf("** Failed to open %s\n", argv[op+1]);
2492 nigel 77 yield = 1;
2493     goto EXIT;
2494 nigel 3 }
2495     }
2496    
2497     /* Set alternative malloc function */
2498    
2499 ph10 836 #ifdef SUPPORT_PCRE8
2500 nigel 3 pcre_malloc = new_malloc;
2501 nigel 73 pcre_free = new_free;
2502     pcre_stack_malloc = stack_malloc;
2503     pcre_stack_free = stack_free;
2504 ph10 836 #endif
2505 nigel 3
2506 ph10 836 #ifdef SUPPORT_PCRE16
2507     pcre16_malloc = new_malloc;
2508     pcre16_free = new_free;
2509     pcre16_stack_malloc = stack_malloc;
2510     pcre16_stack_free = stack_free;
2511     #endif
2512    
2513 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
2514 nigel 3
2515 ph10 836 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2516 nigel 3
2517     /* Main loop */
2518    
2519 nigel 11 while (!done)
2520 nigel 3 {
2521     pcre *re = NULL;
2522     pcre_extra *extra = NULL;
2523 nigel 37
2524     #if !defined NOPOSIX /* There are still compilers that require no indent */
2525 nigel 3 regex_t preg;
2526 nigel 45 int do_posix = 0;
2527 nigel 37 #endif
2528    
2529 nigel 7 const char *error;
2530 ph10 836 pcre_uint8 *markptr;
2531     pcre_uint8 *p, *pp, *ppp;
2532     pcre_uint8 *to_file = NULL;
2533     const pcre_uint8 *tables = NULL;
2534 zherczeg 847 unsigned long int get_options;
2535 nigel 75 unsigned long int true_size, true_study_size = 0;
2536     size_t size, regex_gotten_store;
2537 ph10 654 int do_allcaps = 0;
2538 ph10 512 int do_mark = 0;
2539 nigel 3 int do_study = 0;
2540 ph10 654 int no_force_study = 0;
2541 nigel 25 int do_debug = debug;
2542 nigel 35 int do_G = 0;
2543     int do_g = 0;
2544 nigel 25 int do_showinfo = showinfo;
2545 nigel 35 int do_showrest = 0;
2546 ph10 616 int do_showcaprest = 0;
2547 nigel 75 int do_flip = 0;
2548 nigel 93 int erroroffset, len, delimiter, poffset;
2549 nigel 3
2550 ph10 836 use_utf = 0;
2551 ph10 211 debug_lengths = 1;
2552 nigel 63
2553 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2554 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2555 nigel 63 fflush(outfile);
2556 nigel 3
2557     p = buffer;
2558     while (isspace(*p)) p++;
2559     if (*p == 0) continue;
2560    
2561 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
2562 nigel 3
2563 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2564     {
2565 zherczeg 841 pcre_uint32 magic;
2566 ph10 836 pcre_uint8 sbuf[8];
2567 nigel 75 FILE *f;
2568    
2569     p++;
2570 zherczeg 839 if (*p == '!')
2571     {
2572     do_debug = TRUE;
2573     do_showinfo = TRUE;
2574     p++;
2575     }
2576    
2577 nigel 75 pp = p + (int)strlen((char *)p);
2578     while (isspace(pp[-1])) pp--;
2579     *pp = 0;
2580    
2581     f = fopen((char *)p, "rb");
2582     if (f == NULL)
2583     {
2584     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2585     continue;
2586     }
2587    
2588 zherczeg 839 first_gotten_store = 0;
2589 nigel 75 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2590    
2591     true_size =
2592     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2593     true_study_size =
2594     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2595    
2596 zherczeg 852 re = (pcre *)new_malloc(true_size);
2597 ph10 836 regex_gotten_store = first_gotten_store;
2598 nigel 75
2599     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2600    
2601 zherczeg 852 magic = ((REAL_PCRE *)re)->magic_number;
2602 nigel 75 if (magic != MAGIC_NUMBER)
2603     {
2604 ph10 836 if (swap_uint32(magic) == MAGIC_NUMBER)
2605 nigel 75 {
2606     do_flip = 1;
2607     }
2608     else
2609     {
2610     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2611     fclose(f);
2612     continue;
2613     }
2614     }
2615    
2616 zherczeg 839 /* We hide the byte-invert info for little and big endian tests. */
2617 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2618 zherczeg 839 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2619 nigel 75
2620 ph10 612 /* Now see if there is any following study data. */
2621 nigel 75
2622     if (true_study_size != 0)
2623     {
2624     pcre_study_data *psd;
2625    
2626     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2627     extra->flags = PCRE_EXTRA_STUDY_DATA;
2628    
2629     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2630     extra->study_data = psd;
2631    
2632     if (fread(psd, 1, true_study_size, f) != true_study_size)
2633     {
2634     FAIL_READ:
2635     fprintf(outfile, "Failed to read data from %s\n", p);
2636 ph10 836 if (extra != NULL)
2637     {
2638     PCRE_FREE_STUDY(extra);
2639     }
2640 nigel 75 if (re != NULL) new_free(re);
2641     fclose(f);
2642     continue;
2643     }
2644     fprintf(outfile, "Study data loaded from %s\n", p);
2645     do_study = 1; /* To get the data output if requested */
2646     }
2647     else fprintf(outfile, "No study data\n");
2648    
2649 ph10 836 /* Flip the necessary bytes. */
2650     if (do_flip)
2651     {
2652 zherczeg 839 int rc;
2653     PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2654     if (rc == PCRE_ERROR_BADMODE)
2655     {
2656     /* Simulate the result of the function call below. */
2657     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2658     use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2659     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2660     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2661     continue;
2662     }
2663 ph10 836 }
2664    
2665     /* Need to know if UTF-8 for printing data strings. */
2666    
2667     if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2668     use_utf = (get_options & PCRE_UTF8) != 0;
2669    
2670 nigel 75 fclose(f);
2671     goto SHOW_INFO;
2672     }
2673    
2674     /* In-line pattern (the usual case). Get the delimiter and seek the end of
2675 ph10 836 the pattern; if it isn't complete, read more. */
2676 nigel 75
2677 nigel 3 delimiter = *p++;
2678    
2679 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
2680 nigel 3 {
2681 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2682 nigel 3 goto SKIP_DATA;
2683     }
2684    
2685     pp = p;
2686 ph10 530 poffset = (int)(p - buffer);
2687 nigel 3
2688     for(;;)
2689     {
2690 nigel 29 while (*pp != 0)
2691     {
2692     if (*pp == '\\' && pp[1] != 0) pp++;
2693     else if (*pp == delimiter) break;
2694     pp++;
2695     }
2696 nigel 3 if (*pp != 0) break;
2697 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2698 nigel 3 {
2699     fprintf(outfile, "** Unexpected EOF\n");
2700 nigel 11 done = 1;
2701     goto CONTINUE;
2702 nigel 3 }
2703 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2704 nigel 3 }
2705    
2706 nigel 93 /* The buffer may have moved while being extended; reset the start of data
2707     pointer to the correct relative point in the buffer. */
2708    
2709     p = buffer + poffset;
2710    
2711 nigel 29 /* If the first character after the delimiter is backslash, make
2712     the pattern end with backslash. This is purely to provide a way
2713     of testing for the error message when a pattern ends with backslash. */
2714    
2715     if (pp[1] == '\\') *pp++ = '\\';
2716    
2717 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2718     for callouts. */
2719 nigel 3
2720     *pp++ = 0;
2721 nigel 75 strcpy((char *)pbuffer, (char *)p);
2722 nigel 3
2723     /* Look for options after final delimiter */
2724    
2725     options = 0;
2726 ph10 836 study_options = 0;
2727 nigel 31 log_store = showstore; /* default from command line */
2728    
2729 nigel 3 while (*pp != 0)
2730     {
2731     switch (*pp++)
2732     {
2733 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
2734 nigel 35 case 'g': do_g = 1; break;
2735 nigel 3 case 'i': options |= PCRE_CASELESS; break;
2736     case 'm': options |= PCRE_MULTILINE; break;
2737     case 's': options |= PCRE_DOTALL; break;
2738     case 'x': options |= PCRE_EXTENDED; break;
2739 nigel 25
2740 ph10 616 case '+':
2741 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2742 ph10 616 break;
2743 ph10 654
2744     case '=': do_allcaps = 1; break;
2745 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
2746 nigel 93 case 'B': do_debug = 1; break;
2747 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2748 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
2749 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2750 nigel 75 case 'F': do_flip = 1; break;
2751 nigel 35 case 'G': do_G = 1; break;
2752 nigel 25 case 'I': do_showinfo = 1; break;
2753 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
2754 ph10 512 case 'K': do_mark = 1; break;
2755 nigel 31 case 'M': log_store = 1; break;
2756 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2757 nigel 37
2758     #if !defined NOPOSIX
2759 nigel 3 case 'P': do_posix = 1; break;
2760 nigel 37 #endif
2761    
2762 ph10 654 case 'S':
2763 ph10 691 if (do_study == 0)
2764 ph10 612 {
2765 ph10 691 do_study = 1;
2766 ph10 667 if (*pp == '+')
2767     {
2768     study_options |= PCRE_STUDY_JIT_COMPILE;
2769 ph10 691 pp++;
2770     }
2771     }
2772 ph10 667 else
2773     {
2774 ph10 612 do_study = 0;
2775     no_force_study = 1;
2776 ph10 654 }
2777 ph10 612 break;
2778    
2779 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
2780 ph10 535 case 'W': options |= PCRE_UCP; break;
2781 nigel 3 case 'X': options |= PCRE_EXTRA; break;
2782 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2783 ph10 126 case 'Z': debug_lengths = 0; break;
2784 ph10 836 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2785 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2786 ph10 545
2787 ph10 541 case 'T':
2788     switch (*pp++)
2789     {
2790     case '0': tables = tables0; break;
2791     case '1': tables = tables1; break;
2792 ph10 545
2793 ph10 541 case '\r':
2794     case '\n':
2795 ph10 545 case ' ':
2796     case 0:
2797 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
2798 ph10 545 goto SKIP_DATA;
2799    
2800     default:
2801 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2802 ph10 545 goto SKIP_DATA;
2803 ph10 541 }
2804 ph10 545 break;
2805 nigel 25
2806     case 'L':
2807     ppp = pp;
2808 nigel 93 /* The '\r' test here is so that it works on Windows. */
2809     /* The '0' test is just in case this is an unterminated line. */
2810     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2811 nigel 25 *ppp = 0;
2812     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2813     {
2814     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2815     goto SKIP_DATA;
2816     }
2817 nigel 93 locale_set = 1;
2818 ph10 836 tables = PCRE_MAKETABLES;
2819 nigel 25 pp = ppp;
2820     break;
2821    
2822 nigel 75 case '>':
2823     to_file = pp;
2824     while (*pp != 0) pp++;
2825     while (isspace(pp[-1])) pp--;
2826     *pp = 0;
2827     break;
2828    
2829 nigel 91 case '<':
2830     {
2831 ph10 836 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2832 ph10 336 {
2833     options |= PCRE_JAVASCRIPT_COMPAT;
2834 ph10 345 pp += 3;
2835 ph10 336 }
2836     else
2837 ph10 345 {
2838 ph10 336 int x = check_newline(pp, outfile);
2839     if (x == 0) goto SKIP_DATA;
2840     options |= x;
2841     while (*pp++ != '>');
2842 ph10 345 }
2843 nigel 91 }
2844     break;
2845    
2846 nigel 77 case '\r': /* So that it works in Windows */
2847     case '\n':
2848     case ' ':
2849     break;
2850 nigel 75
2851 nigel 3 default:
2852     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2853     goto SKIP_DATA;
2854     }
2855     }
2856    
2857 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
2858 nigel 25 timing, showing, or debugging options, nor the ability to pass over
2859 ph10 836 local character tables. Neither does it have 16-bit support. */
2860 nigel 3
2861 nigel 37 #if !defined NOPOSIX
2862 nigel 3 if (posix || do_posix)
2863     {
2864     int rc;
2865     int cflags = 0;
2866 nigel 75
2867 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2868     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2869 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2870 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2871     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2872 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2873 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2874 nigel 87
2875 ph10 836 first_gotten_store = 0;
2876 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
2877    
2878     /* Compilation failed; go back for another re, skipping to blank line
2879     if non-interactive. */
2880    
2881     if (rc != 0)
2882     {
2883 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2884 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2885     goto SKIP_DATA;
2886     }
2887     }
2888    
2889     /* Handle compiling via the native interface */
2890    
2891     else
2892 nigel 37 #endif /* !defined NOPOSIX */
2893    
2894 nigel 3 {
2895 ph10 836 /* In 16-bit mode, convert the input. */
2896    
2897     #ifdef SUPPORT_PCRE16
2898     if (use_pcre16)
2899     {
2900     switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2901     {
2902     case -1:
2903     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2904     "converted to UTF-16\n");
2905     goto SKIP_DATA;
2906    
2907     case -2:
2908     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2909     "cannot be converted to UTF-16\n");
2910     goto SKIP_DATA;
2911 ph10 842
2912 ph10 836 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2913     fprintf(outfile, "**Failed: character value greater than 0xffff "
2914     "cannot be converted to 16-bit in non-UTF mode\n");
2915 ph10 842 goto SKIP_DATA;
2916 ph10 836
2917     default:
2918     break;
2919     }
2920     p = (pcre_uint8 *)buffer16;
2921     }
2922     #endif
2923    
2924     /* Compile many times when timing */
2925    
2926 nigel 93 if (timeit > 0)
2927 nigel 3 {
2928     register int i;
2929     clock_t time_taken;
2930     clock_t start_time = clock();
2931 nigel 93 for (i = 0; i < timeit; i++)
2932 nigel 3 {
2933 ph10 836 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2934 nigel 3 if (re != NULL) free(re);
2935     }
2936     time_taken = clock() - start_time;
2937 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
2938     (((double)time_taken * 1000.0) / (double)timeit) /
2939 nigel 63 (double)CLOCKS_PER_SEC);
2940 nigel 3 }
2941    
2942 ph10 836 first_gotten_store = 0;
2943     PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2944 nigel 3
2945     /* Compilation failed; go back for another re, skipping to blank line
2946     if non-interactive. */
2947    
2948     if (re == NULL)
2949     {
2950     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2951     SKIP_DATA:
2952     if (infile != stdin)
2953     {
2954     for (;;)
2955     {
2956 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
2957 nigel 11 {
2958     done = 1;
2959     goto CONTINUE;
2960     }
2961 nigel 3 len = (int)strlen((char *)buffer);
2962     while (len > 0 && isspace(buffer[len-1])) len--;
2963     if (len == 0) break;
2964     }
2965     fprintf(outfile, "\n");
2966     }
2967 nigel 25 goto CONTINUE;
2968 nigel 3 }
2969 ph10 416
2970     /* Compilation succeeded. It is now possible to set the UTF-8 option from
2971     within the regex; check for this so that we know how to process the data
2972 ph10 412 lines. */
2973 ph10 416
2974 ph10 836 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2975     goto SKIP_DATA;
2976     if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2977 nigel 3
2978 ph10 836 /* Extract the size for possible writing before possibly flipping it,
2979     and remember the store that was got. */
2980 nigel 3
2981 zherczeg 852 true_size = ((REAL_PCRE *)re)->size;
2982 ph10 836 regex_gotten_store = first_gotten_store;
2983    
2984     /* Output code size information if requested */
2985    
2986 nigel 63 if (log_store)
2987     fprintf(outfile, "Memory allocation (code space): %d\n",
2988 ph10 836 (int)(first_gotten_store -
2989 zherczeg 852 sizeof(REAL_PCRE) -
2990     ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2991 nigel 63
2992 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
2993 ph10 654 help with the matching, unless the pattern has the SS option, which
2994 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
2995     never sensible). */
2996 nigel 75
2997 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
2998 nigel 75 {
2999 nigel 93 if (timeit > 0)
3000 nigel 75 {
3001     register int i;
3002     clock_t time_taken;
3003     clock_t start_time = clock();
3004 nigel 93 for (i = 0; i < timeit; i++)
3005 ph10 836 {
3006     PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3007     }
3008 nigel 75 time_taken = clock() - start_time;
3009 ph10 836 if (extra != NULL)
3010     {
3011     PCRE_FREE_STUDY(extra);
3012     }
3013 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
3014     (((double)time_taken * 1000.0) / (double)timeit) /
3015 nigel 75 (double)CLOCKS_PER_SEC);
3016     }
3017 ph10 836 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3018 nigel 75 if (error != NULL)
3019     fprintf(outfile, "Failed to study: %s\n", error);
3020     else if (extra != NULL)
3021 ph10 836 {
3022 nigel 75 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3023 ph10 836 if (log_store)
3024     {
3025     size_t jitsize;
3026     if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3027     jitsize != 0)
3028     fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3029     }
3030     }
3031 nigel 75 }
3032 ph10 788
3033 ph10 510 /* If /K was present, we set up for handling MARK data. */
3034 ph10 512
3035 ph10 510 if (do_mark)
3036     {
3037     if (extra == NULL)
3038     {
3039     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3040     extra->flags = 0;
3041     }
3042 ph10 512 extra->mark = &markptr;
3043 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
3044 ph10 512 }
3045 nigel 75
3046 ph10 836 /* Extract and display information from the compiled data if required. */
3047 nigel 75
3048     SHOW_INFO:
3049    
3050 nigel 93 if (do_debug)
3051     {
3052     fprintf(outfile, "------------------------------------------------------------------\n");
3053 ph10 836 PCRE_PRINTINT(re, outfile, debug_lengths);
3054 nigel 93 }
3055 ph10 416
3056 ph10 412 /* We already have the options in get_options (see above) */
3057 nigel 93
3058 nigel 25 if (do_showinfo)
3059 nigel 3 {
3060 ph10 412 unsigned long int all_options;
3061 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3062 ph10 227 hascrorlf;
3063 nigel 63 int nameentrysize, namecount;
3064 ph10 836 const pcre_uint8 *nametable;
3065 nigel 3
3066 ph10 836 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3067     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3068     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3069     new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3070     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3071     new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3072     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3073     new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3074     new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3075     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3076     new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3077     != 0)
3078     goto SKIP_DATA;
3079 nigel 43
3080 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
3081 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3082 nigel 77 (int)size, (int)regex_gotten_store);
3083 nigel 43
3084     fprintf(outfile, "Capturing subpattern count = %d\n", count);
3085     if (backrefmax > 0)
3086     fprintf(outfile, "Max back reference = %d\n", backrefmax);
3087 nigel 63
3088     if (namecount > 0)
3089     {
3090     fprintf(outfile, "Named capturing subpatterns:\n");
3091     while (namecount-- > 0)
3092     {
3093 ph10 836 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3094     int imm2_size = use_pcre16 ? 1 : 2;
3095     #else
3096     int imm2_size = IMM2_SIZE;
3097     #endif
3098     int length = (int)STRLEN(nametable + imm2_size);
3099     fprintf(outfile, " ");
3100     PCHARSV(nametable, imm2_size, length, outfile);
3101     while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3102     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3103     fprintf(outfile, "%3d\n", use_pcre16?
3104     (int)(((PCRE_SPTR16)nametable)[0])
3105     :((int)nametable[0] << 8) | (int)nametable[1]);
3106     nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3107     #else
3108     fprintf(outfile, "%3d\n", GET2(nametable, 0));
3109     #ifdef SUPPORT_PCRE8
3110 nigel 63 nametable += nameentrysize;
3111 ph10 836 #else
3112     nametable += nameentrysize * 2;
3113     #endif
3114     #endif
3115 nigel 63 }
3116     }
3117 ph10 172
3118 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3119 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3120 nigel 63
3121 zherczeg 852 all_options = ((REAL_PCRE *)re)->options;
3122 ph10 836 if (do_flip) all_options = swap_uint32(all_options);
3123 nigel 75
3124 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
3125 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3126 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3127     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3128     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3129     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3130 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3131 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3132 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3133     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3134 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3135     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3136     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3137 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3138 ph10 836 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3139 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3140 ph10 836 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3141 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3142 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3143 ph10 172
3144 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3145 nigel 43
3146 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
3147 nigel 91 {
3148     case PCRE_NEWLINE_CR:
3149     fprintf(outfile, "Forced newline sequence: CR\n");
3150     break;
3151 nigel 43
3152 nigel 91 case PCRE_NEWLINE_LF:
3153     fprintf(outfile, "Forced newline sequence: LF\n");
3154     break;
3155    
3156     case PCRE_NEWLINE_CRLF:
3157     fprintf(outfile, "Forced newline sequence: CRLF\n");
3158     break;
3159    
3160 ph10 149 case PCRE_NEWLINE_ANYCRLF:
3161     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3162     break;
3163    
3164 nigel 93 case PCRE_NEWLINE_ANY:
3165     fprintf(outfile, "Forced newline sequence: ANY\n");
3166     break;
3167    
3168 nigel 91 default:
3169     break;
3170     }
3171    
3172 nigel 43 if (first_char == -1)
3173     {
3174 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
3175 nigel 43 }
3176     else if (first_char < 0)
3177     {
3178     fprintf(outfile, "No first char\n");
3179     }
3180     else
3181     {
3182 ph10 836 const char *caseless =
3183 zherczeg 852 ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3184 nigel 63 "" : " (caseless)";
3185 ph10 836
3186     if (PRINTOK(first_char))
3187     fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3188 nigel 3 else
3189 ph10 836 {
3190     fprintf(outfile, "First char = ");
3191     pchar(first_char, outfile);
3192     fprintf(outfile, "%s\n", caseless);
3193     }
3194 nigel 43 }
3195 nigel 37
3196 nigel 43 if (need_char < 0)
3197     {
3198     fprintf(outfile, "No need char\n");
3199 nigel 3 }
3200 nigel 43 else
3201     {
3202 ph10 836 const char *caseless =
3203 zherczeg 852 ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3204 nigel 63 "" : " (caseless)";
3205 ph10 836
3206     if (PRINTOK(need_char))
3207     fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3208 nigel 43 else
3209 ph10 836 {
3210     fprintf(outfile, "Need char = ");
3211     pchar(need_char, outfile);
3212     fprintf(outfile, "%s\n", caseless);
3213     }
3214 nigel 43 }
3215 nigel 75
3216     /* Don't output study size; at present it is in any case a fixed
3217     value, but it varies, depending on the computer architecture, and
3218     so messes up the test suite. (And with the /F option, it might be
3219 ph10 654 flipped.) If study was forced by an external -s, don't show this
3220 ph10 612 information unless -i or -d was also present. This means that, except
3221     when auto-callouts are involved, the output from runs with and without
3222     -s should be identical. */
3223 nigel 75
3224 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3225 nigel 75 {
3226     if (extra == NULL)
3227     fprintf(outfile, "Study returned NULL\n");
3228     else
3229     {
3230 ph10 836 pcre_uint8 *start_bits = NULL;
3231 ph10 455 int minlength;
3232 ph10 461
3233 ph10 836 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3234     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3235 ph10 461
3236 ph10 836 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3237 nigel 75 {
3238 ph10 836 if (start_bits == NULL)
3239     fprintf(outfile, "No set of starting bytes\n");
3240     else
3241 nigel 75 {
3242 ph10 836 int i;
3243     int c = 24;
3244     fprintf(outfile, "Starting byte set: ");
3245     for (i = 0; i < 256; i++)
3246 nigel 75 {
3247 ph10 836 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3248 nigel 75 {
3249 ph10 836 if (c > 75)
3250     {
3251     fprintf(outfile, "\n ");
3252     c = 2;
3253     }
3254     if (PRINTOK(i) && i != ' ')
3255     {
3256     fprintf(outfile, "%c ", i);
3257     c += 2;
3258     }
3259     else
3260     {
3261     fprintf(outfile, "\\x%02x ", i);
3262     c += 5;
3263     }
3264 nigel 75 }
3265     }
3266 ph10 836 fprintf(outfile, "\n");
3267 nigel 75 }
3268     }
3269     }
3270 ph10 691
3271 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
3272 ph10 691
3273 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3274     {
3275 ph10 691 int jit;
3276 ph10 836 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3277     {
3278     if (jit)
3279     fprintf(outfile, "JIT study was successful\n");
3280     else
3281 ph10 691 #ifdef SUPPORT_JIT
3282 ph10 836 fprintf(outfile, "JIT study was not successful\n");
3283 ph10 667 #else
3284 ph10 836 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3285 ph10 667 #endif
3286 ph10 836 }
3287 ph10 691 }
3288 nigel 75 }
3289 nigel 3 }
3290    
3291 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
3292     that is all. The first 8 bytes of the file are the regex length and then
3293     the study length, in big-endian order. */
3294 nigel 3
3295 nigel 75 if (to_file != NULL)
3296 nigel 3 {
3297 nigel 75 FILE *f = fopen((char *)to_file, "wb");
3298     if (f == NULL)
3299 nigel 3 {
3300 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3301 nigel 3 }
3302 nigel 75 else
3303     {
3304 ph10 836 pcre_uint8 sbuf[8];
3305 ph10 259
3306 ph10 836 if (do_flip) regexflip(re, extra);
3307     sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3308     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3309     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3310     sbuf[3] = (pcre_uint8)((true_size) & 255);
3311     sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3312     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3313     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3314     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3315 nigel 3
3316 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
3317     fwrite(re, 1, true_size, f) < true_size)
3318     {
3319     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3320     }
3321 nigel 3 else
3322     {
3323 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3324 ph10 654
3325 ph10 658 /* If there is study data, write it. */
3326 ph10 654
3327 nigel 75 if (extra != NULL)
3328 nigel 3 {
3329 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
3330     true_study_size)
3331 nigel 3 {
3332 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
3333     strerror(errno));
3334 nigel 3 }
3335 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
3336 nigel 3 }
3337     }
3338 nigel 75 fclose(f);
3339 nigel 3 }
3340 nigel 77
3341     new_free(re);
3342 ph10 836 if (extra != NULL)
3343     {
3344     PCRE_FREE_STUDY(extra);
3345     }
3346 ph10 545 if (locale_set)
3347 ph10 541 {
3348     new_free((void *)tables);
3349     setlocale(LC_CTYPE, "C");
3350 ph10 545 locale_set = 0;
3351     }
3352 nigel 75 continue; /* With next regex */
3353 nigel 3 }
3354 nigel 75 } /* End of non-POSIX compile */
3355 nigel 3
3356     /* Read data lines and test them */
3357    
3358     for (;;)
3359     {
3360 ph10 836 pcre_uint8 *q;
3361     pcre_uint8 *bptr;
3362 nigel 57 int *use_offsets = offsets;
3363 nigel 53 int use_size_offsets = size_offsets;
3364 nigel 63 int callout_data = 0;
3365     int callout_data_set = 0;
3366 nigel 3 int count, c;
3367 nigel 29 int copystrings = 0;
3368 ph10 386 int find_match_limit = default_find_match_limit;
3369 nigel 29 int getstrings = 0;
3370     int getlist = 0;
3371 nigel 39 int gmatched = 0;
3372 nigel 35 int start_offset = 0;
3373 ph10 579 int start_offset_sign = 1;
3374 nigel 41 int g_notempty = 0;
3375 nigel 77 int use_dfa = 0;
3376 nigel 3
3377 nigel 91 *copynames = 0;
3378     *getnames = 0;
3379    
3380 ph10 881 #ifdef SUPPORT_PCRE16
3381 ph10 836 cn16ptr = copynames;
3382     gn16ptr = getnames;
3383 ph10 881 #endif
3384     #ifdef SUPPORT_PCRE8
3385 ph10 836 cn8ptr = copynames8;
3386     gn8ptr = getnames8;
3387 ph10 881 #endif
3388 nigel 91
3389 ph10 836 SET_PCRE_CALLOUT(callout);
3390 nigel 63 first_callout = 1;
3391 ph10 654 last_callout_mark = NULL;
3392 nigel 63 callout_extra = 0;
3393     callout_count = 0;
3394     callout_fail_count = 999999;
3395     callout_fail_id = -1;
3396 nigel 73 show_malloc = 0;
3397 ph10 836 options = 0;
3398 nigel 63
3399 nigel 91 if (extra != NULL) extra->flags &=
3400     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3401    
3402     len = 0;
3403     for (;;)
3404 nigel 11 {
3405 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3406 nigel 91 {
3407 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
3408     {
3409 ph10 545 fprintf(outfile, "\n");
3410 ph10 537 break;
3411 ph10 545 }
3412 nigel 91 done = 1;
3413     goto CONTINUE;
3414     }
3415     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3416     len = (int)strlen((char *)buffer);
3417     if (buffer[len-1] == '\n') break;
3418 nigel 11 }
3419 nigel 3
3420     while (len > 0 && isspace(buffer[len-1])) len--;
3421     buffer[len] = 0;
3422     if (len == 0) break;
3423    
3424     p = buffer;
3425     while (isspace(*p)) p++;
3426    
3427 ph10 147 bptr = q = dbuffer;
3428 nigel 3 while ((c = *p++) != 0)
3429     {
3430     int i = 0;
3431     int n = 0;
3432 ph10 842
3433 ph10 836 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3434     In non-UTF mode, allow the value of the byte to fall through to later,
3435     where values greater than 127 are turned into UTF-8 when running in
3436     16-bit mode. */
3437 ph10 842
3438 ph10 836 if (c != '\\')
3439 nigel 3 {
3440 ph10 836 if (use_utf)
3441     {
3442     *q++ = c;
3443     continue;
3444 ph10 842 }
3445     }
3446    
3447 ph10 836 /* Handle backslash escapes */
3448 ph10 842
3449 ph10 836 else switch ((c = *p++))
3450     {
3451 nigel 3 case 'a': c = 7; break;
3452     case 'b': c = '\b'; break;
3453     case 'e': c = 27; break;
3454     case 'f': c = '\f'; break;
3455     case 'n': c = '\n'; break;
3456     case 'r': c = '\r'; break;
3457     case 't': c = '\t'; break;
3458     case 'v': c = '\v'; break;
3459    
3460     case '0': case '1': case '2': case '3':
3461     case '4': case '5': case '6': case '7':
3462     c -= '0';
3463     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3464     c = c * 8 + *p++ - '0';
3465     break;
3466    
3467     case 'x':
3468 nigel 49 if (*p == '{')
3469     {
3470 ph10 836 pcre_uint8 *pt = p;
3471 nigel 49 c = 0;
3472 ph10 738
3473 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3474     when isxdigit() is a macro that refers to its argument more than
3475     once. This is banned by the C Standard, but apparently happens in at
3476     least one MacOS environment. */
3477 ph10 738
3478 ph10 735 for (pt++; isxdigit(*pt); pt++)
3479 ph10 862 {
3480     if (++i == 9)
3481     fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3482     "using only the first eight.\n");
3483     else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3484     }
3485 nigel 49 if (*pt == '}')
3486     {
3487     p = pt + 1;
3488     break;
3489     }
3490 ph10 836 /* Not correct form for \x{...}; fall through */
3491 nigel 49 }
3492    
3493 ph10 842 /* \x without {} always defines just one byte in 8-bit mode. This
3494     allows UTF-8 characters to be constructed byte by byte, and also allows
3495     invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3496     Otherwise, pass it down to later code so that it can be turned into
3497 ph10 836 UTF-8 when running in 16-bit mode. */
3498 nigel 49
3499 nigel 3 c = 0;
3500     while (i++ < 2 && isxdigit(*p))
3501     {
3502 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3503 nigel 3 p++;
3504     }
3505 ph10 836 if (use_utf)
3506 ph10 842 {
3507 ph10 836 *q++ = c;
3508 ph10 842 continue;
3509     }
3510 nigel 3 break;
3511    
3512 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
3513 nigel 3 p--;
3514     continue;
3515    
3516 nigel 75 case '>':
3517 ph10 579 if (*p == '-')
3518 ph10 567 {
3519     start_offset_sign = -1;
3520     p++;
3521 ph10 579 }
3522 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3523 ph10 579 start_offset *= start_offset_sign;
3524 nigel 75 continue;
3525    
3526 nigel 3 case 'A': /* Option setting */
3527     options |= PCRE_ANCHORED;
3528     continue;
3529    
3530     case 'B':
3531     options |= PCRE_NOTBOL;
3532     continue;
3533    
3534 nigel 29 case 'C':
3535 nigel 63 if (isdigit(*p)) /* Set copy string */
3536     {
3537     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3538     copystrings |= 1 << n;
3539     }
3540     else if (isalnum(*p))
3541     {
3542 ph10 836 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3543 nigel 63 }
3544     else if (*p == '+')
3545     {
3546     callout_extra = 1;
3547     p++;
3548     }
3549     else if (*p == '-')
3550     {
3551 ph10 836 SET_PCRE_CALLOUT(NULL);
3552 nigel 63 p++;
3553     }
3554     else if (*p == '!')
3555     {
3556     callout_fail_id = 0;
3557     p++;
3558     while(isdigit(*p))
3559     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3560     callout_fail_count = 0;
3561     if (*p == '!')
3562     {
3563     p++;
3564     while(isdigit(*p))
3565     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3566     }
3567     }
3568     else if (*p == '*')
3569     {
3570     int sign = 1;
3571     callout_data = 0;
3572     if (*(++p) == '-') { sign = -1; p++; }
3573     while(isdigit(*p))
3574     callout_data = callout_data * 10 + *p++ - '0';
3575     callout_data *= sign;
3576     callout_data_set = 1;
3577     }
3578 nigel 29 continue;
3579    
3580 nigel 79 #if !defined NODFA
3581 nigel 77 case 'D':
3582 nigel 79 #if !defined NOPOSIX
3583 nigel 77 if (posix || do_posix)
3584     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3585     else
3586 nigel 79 #endif
3587 nigel 77 use_dfa = 1;
3588     continue;
3589 ph10 553 #endif
3590 nigel 77
3591 ph10 553 #if !defined NODFA
3592 nigel 77 case 'F':
3593     options |= PCRE_DFA_SHORTEST;
3594     continue;
3595 nigel 79 #endif
3596 nigel 77
3597 nigel 29 case 'G':
3598 nigel 63 if (isdigit(*p))
3599     {
3600     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3601     getstrings |= 1 << n;
3602     }
3603     else if (isalnum(*p))
3604     {
3605 ph10 836 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3606 nigel 63 }
3607 nigel 29 continue;
3608 ph10 691
3609 ph10 667 case 'J':
3610     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3611 ph10 691 if (extra != NULL
3612     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3613 ph10 667 && extra->executable_jit != NULL)
3614 ph10 691 {
3615 zherczeg 852 if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3616 ph10 836 jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3617     PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3618 ph10 691 }
3619 ph10 667 continue;
3620 nigel 29
3621     case 'L':
3622     getlist = 1;
3623     continue;
3624    
3625 nigel 63 case 'M':
3626     find_match_limit = 1;
3627     continue;
3628    
3629 nigel 37 case 'N':
3630 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
3631     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3632 ph10 461 else
3633 ph10 442 options |= PCRE_NOTEMPTY;
3634 nigel 37 continue;
3635    
3636 nigel 3 case 'O':
3637     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3638 nigel 53 if (n > size_offsets_max)
3639     {
3640     size_offsets_max = n;
3641 nigel 57 free(offsets);
3642 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3643 nigel 53 if (offsets == NULL)
3644     {
3645     printf("** Failed to get %d bytes of memory for offsets vector\n",
3646 ph10 151 (int)(size_offsets_max * sizeof(int)));
3647 nigel 77 yield = 1;
3648     goto EXIT;
3649 nigel 53 }
3650     }
3651     use_size_offsets = n;
3652 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3653 nigel 3 continue;
3654    
3655 nigel 75 case 'P':
3656 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3657 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3658 nigel 75 continue;
3659    
3660 nigel 91 case 'Q':
3661     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3662     if (extra == NULL)
3663     {
3664     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3665     extra->flags = 0;
3666     }
3667     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3668     extra->match_limit_recursion = n;
3669     continue;
3670    
3671     case 'q':
3672     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3673     if (extra == NULL)
3674     {
3675     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3676     extra->flags = 0;
3677     }
3678     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3679     extra->match_limit = n;
3680     continue;
3681    
3682 nigel 79 #if !defined NODFA
3683 nigel 77 case 'R':
3684     options |= PCRE_DFA_RESTART;
3685     continue;
3686 nigel 79 #endif
3687 nigel 77
3688 nigel 73 case 'S':
3689     show_malloc = 1;
3690     continue;
3691 ph10 392
3692 ph10 389 case 'Y':
3693     options |= PCRE_NO_START_OPTIMIZE;
3694 ph10 392 continue;
3695 nigel 73
3696 nigel 3 case 'Z':
3697     options |= PCRE_NOTEOL;
3698     continue;
3699 nigel 71
3700     case '?':
3701     options |= PCRE_NO_UTF8_CHECK;
3702     continue;
3703 nigel 91
3704     case '<':
3705     {
3706     int x = check_newline(p, outfile);
3707     if (x == 0) goto NEXT_DATA;
3708     options |= x;
3709     while (*p++ != '>');
3710     }
3711     continue;
3712 nigel 3 }
3713 ph10 836
3714 ph10 842 /* We now have a character value in c that may be greater than 255. In
3715     16-bit mode, we always convert characters to UTF-8 so that values greater
3716 ph10 836 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3717 ph10 842 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3718 ph10 836 mode must have come from \x{...} or octal constructs because values from
3719     \x.. get this far only in non-UTF mode. */
3720    
3721 ph10 842 #if !defined NOUTF || defined SUPPORT_PCRE16
3722 ph10 836 if (use_pcre16 || use_utf)
3723     {
3724     pcre_uint8 buff8[8];
3725     int ii, utn;
3726     utn = ord2utf8(c, buff8);
3727     for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3728     }
3729     else
3730 ph10 842 #endif
3731 ph10 836 {
3732     if (c > 255)
3733     {
3734     fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3735     "and UTF-8 mode is not enabled.\n", c);
3736     fprintf(outfile, "** Truncation will probably give the wrong "
3737     "result.\n");
3738     }
3739     *q++ = c;
3740     }
3741 nigel 3 }
3742 ph10 842
3743 ph10 836 /* Reached end of subject string */
3744 ph10 842
3745 nigel 9 *q = 0;
3746 ph10 530 len = (int)(q - dbuffer);
3747 ph10 545
3748 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
3749 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3750 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
3751 ph10 371
3752 ph10 363 #if !defined NOPOSIX
3753     if (posix || do_posix)
3754     {
3755     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3756 ph10 371 bptr += buffer_size - len - 1;
3757 ph10 363 }
3758 ph10 371 else
3759     #endif
3760 ph10 363 {
3761     memmove(bptr + buffer_size - len, bptr, len);
3762 ph10 371 bptr += buffer_size - len;
3763     }
3764 nigel 3
3765 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
3766     {
3767     printf("**Match limit not relevant for DFA matching: ignored\n");
3768     find_match_limit = 0;
3769     }
3770    
3771 nigel 3 /* Handle matching via the POSIX interface, which does not
3772 nigel 63 support timing or playing with the match limit or callout data. */
3773 nigel 3
3774 nigel 37 #if !defined NOPOSIX
3775 nigel 3 if (posix || do_posix)
3776     {
3777     int rc;
3778     int eflags = 0;
3779 nigel 63 regmatch_t *pmatch = NULL;
3780     if (use_size_offsets > 0)
3781 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3782 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3783     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3784 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3785 nigel 3
3786 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3787 nigel 3
3788     if (rc != 0)
3789     {
3790 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3791 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3792     }
3793 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3794     != 0)
3795     {
3796     fprintf(outfile, "Matched with REG_NOSUB\n");
3797     }
3798 nigel 3 else
3799     {
3800 nigel 7 size_t i;
3801 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
3802 nigel 3 {
3803     if (pmatch[i].rm_so >= 0)
3804     {
3805 nigel 23 fprintf(outfile, "%2d: ", (int)i);
3806 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_so,
3807 nigel 63 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3808 nigel 3 fprintf(outfile, "\n");
3809 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3810 nigel 35 {
3811 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
3812 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3813 nigel 63 outfile);
3814 nigel 35 fprintf(outfile, "\n");
3815     }
3816 nigel 3 }
3817     }
3818     }
3819 nigel 53 free(pmatch);
3820 ph10 836 goto NEXT_DATA;
3821 nigel 3 }
3822    
3823 ph10 836 #endif /* !defined NOPOSIX */
3824    
3825 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
3826 nigel 3
3827 ph10 836 #ifdef SUPPORT_PCRE16
3828     if (use_pcre16)
3829     {
3830 zherczeg 852 len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3831 ph10 836 switch(len)
3832     {
3833     case -1:
3834     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3835     "converted to UTF-16\n");
3836     goto NEXT_DATA;
3837 nigel 37
3838 ph10 836 case -2:
3839     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3840     "cannot be converted to UTF-16\n");
3841     goto NEXT_DATA;
3842    
3843     case -3:
3844     fprintf(outfile, "**Failed: character value greater than 0xffff "
3845     "cannot be converted to 16-bit in non-UTF mode\n");
3846 ph10 842 goto NEXT_DATA;
3847 ph10 836
3848     default:
3849     break;
3850     }
3851     bptr = (pcre_uint8 *)buffer16;
3852     }
3853     #endif
3854    
3855 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
3856 nigel 3 {
3857 ph10 512 markptr = NULL;
3858    
3859 nigel 93 if (timeitm > 0)
3860 nigel 3 {
3861     register int i;
3862     clock_t time_taken;
3863     clock_t start_time = clock();
3864 nigel 77
3865 nigel 79 #if !defined NODFA
3866 nigel 77 if (all_use_dfa || use_dfa)
3867     {
3868     int workspace[1000];
3869 nigel 93 for (i = 0; i < timeitm; i++)
3870 ph10 836 {
3871     PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3872     (options | g_notempty), use_offsets, use_size_offsets, workspace,
3873     (sizeof(workspace)/sizeof(int)));
3874     }
3875 nigel 77 }
3876     else
3877 nigel 79 #endif
3878 nigel 77
3879 nigel 93 for (i = 0; i < timeitm; i++)
3880 ph10 836 {
3881     PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3882     (options | g_notempty), use_offsets, use_size_offsets);
3883     }
3884 nigel 3 time_taken = clock() - start_time;
3885 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
3886     (((double)time_taken * 1000.0) / (double)timeitm) /
3887 nigel 63 (double)CLOCKS_PER_SEC);
3888 nigel 3 }
3889    
3890 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
3891 nigel 87 varying limits in order to find the minimum value for the match limit and
3892 ph10 667 for the recursion limit. The match limits are relevant only to the normal
3893     running of pcre_exec(), so disable the JIT optimization. This makes it
3894     possible to run the same set of tests with and without JIT externally