/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 817 - (hide annotations) (download)
Thu Dec 22 07:03:34 2011 UTC (17 months ago) by zherczeg
File MIME type: text/plain
File size: 119984 byte(s)
More pcretest fixes for dual 8/16 bit mode
1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 813 been extended and consequently is now rather, er, *very* untidy in places. The
8 ph10 808 addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39    
40 ph10 200 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 200 #endif
43 ph10 199
44 nigel 3 #include <ctype.h>
45     #include <stdio.h>
46     #include <string.h>
47     #include <stdlib.h>
48     #include <time.h>
49 nigel 25 #include <locale.h>
50 nigel 75 #include <errno.h>
51 nigel 3
52 ph10 287 #ifdef SUPPORT_LIBREADLINE
53 ph10 343 #ifdef HAVE_UNISTD_H
54 ph10 287 #include <unistd.h>
55 ph10 343 #endif
56 ph10 287 #include <readline/readline.h>
57     #include <readline/history.h>
58     #endif
59 nigel 93
60 ph10 287
61 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
62     input and output without "b"; then I was told that "b" was needed in some
63     environments, so it was added for release 5.0 to both the input and output. (It
64     makes no difference on Unix-like systems.) Later I was told that it is wrong
65     for the input on Windows. I've now abstracted the modes into two macros that
66     are set here, to make it easier to fiddle with them, and removed "b" from the
67     input mode under Windows. */
68    
69     #if defined(_WIN32) || defined(WIN32)
70     #include <io.h> /* For _setmode() */
71     #include <fcntl.h> /* For _O_BINARY */
72     #define INPUT_MODE "r"
73     #define OUTPUT_MODE "wb"
74    
75 ph10 411 #ifndef isatty
76     #define isatty _isatty /* This is what Windows calls them, I'm told, */
77     #endif /* though in some environments they seem to */
78     /* be already defined, hence the #ifndefs. */
79     #ifndef fileno
80 ph10 343 #define fileno _fileno
81 ph10 411 #endif
82 ph10 343
83 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85     #ifdef __BORLANDC__
86     #define _setmode(handle, mode) setmode(handle, mode)
87     #endif
88    
89     /* Not Windows */
90    
91 nigel 93 #else
92     #include <sys/time.h> /* These two includes are needed */
93     #include <sys/resource.h> /* for setrlimit(). */
94     #define INPUT_MODE "rb"
95     #define OUTPUT_MODE "wb"
96 nigel 91 #endif
97    
98 nigel 93
99 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
100     displaying the results of pcre_study() and we also need to know about the
101     internal macros, structures, and other internal data values; pcretest has
102     "inside information" compared to a program that strictly follows the PCRE API.
103 nigel 37
104 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106     appropriately for an application, not for building PCRE. */
107 nigel 77
108 ph10 145 #include "pcre.h"
109 nigel 77 #include "pcre_internal.h"
110    
111 ph10 808 /* The pcre_printint() function, which prints the internal form of a compiled
112     regex, is held in a separate file so that (a) it can be compiled in either
113     8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 ph10 805 when that is compiled in debug mode. */
115    
116     #ifdef SUPPORT_PCRE8
117     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118     #endif
119     #ifdef SUPPORT_PCRE16
120     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121     #endif
122    
123 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
124     to keep two copies, we include the source file here, changing the names of the
125     external symbols to prevent clashes. */
126 nigel 77
127 ph10 351 #define _pcre_ucp_gentype ucp_gentype
128 ph10 667 #define _pcre_ucp_typerange ucp_typerange
129 nigel 85 #define _pcre_utf8_table1 utf8_table1
130     #define _pcre_utf8_table1_size utf8_table1_size
131     #define _pcre_utf8_table2 utf8_table2
132     #define _pcre_utf8_table3 utf8_table3
133     #define _pcre_utf8_table4 utf8_table4
134     #define _pcre_utt utt
135     #define _pcre_utt_size utt_size
136 ph10 240 #define _pcre_utt_names utt_names
137 nigel 85 #define _pcre_OP_lengths OP_lengths
138    
139     #include "pcre_tables.c"
140    
141 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
142 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
143 ph10 805 the same as in the printint.src file. We uses it here in cases when the locale
144     has not been explicitly changed, so as to get consistent output from systems
145     that differ in their output from isprint() even in the "C" locale. */
146 nigel 93
147 ph10 805 #ifdef EBCDIC
148     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149     #else
150     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151     #endif
152    
153 ph10 808 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154 nigel 85
155 nigel 37 /* It is possible to compile this test program without including support for
156     testing the POSIX interface, though this is not available via the standard
157     Makefile. */
158    
159     #if !defined NOPOSIX
160 nigel 3 #include "pcreposix.h"
161 nigel 37 #endif
162 nigel 3
163 ph10 808 /* It is also possible, originally for the benefit of a version that was
164     imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165     without the interface to the DFA matcher (NODFA), and without the doublecheck
166     of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167     out the UTF8 support if PCRE is built without it. */
168 nigel 79
169 ph10 107 #ifndef SUPPORT_UTF8
170     #ifndef NOUTF8
171     #define NOUTF8
172     #endif
173     #endif
174 nigel 79
175 ph10 808 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176     for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177     only from one place and is handled differently). I couldn't dream up any way of
178     using a single macro to do this in a generic way, because of the many different
179     argument requirements. We know that at least one of SUPPORT_PCRE8 and
180     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181     use these in the definitions of generic macros. */
182 ph10 107
183 ph10 808 #ifdef SUPPORT_PCRE8
184 ph10 813
185 ph10 808 #define PCHARS8(lv, p, len, f) \
186     lv = pchars((pcre_uint8 *)p, len, f)
187    
188     #define PCHARSV8(p, len, f) \
189     (void)pchars((pcre_uint8 *)p, len, f)
190    
191 zherczeg 816 #define STRLEN8(p) ((int)strlen((char *)p))
192 ph10 815
193 ph10 808 #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
194     re = pcre_compile((char *)pat, options, error, erroffset, tables)
195    
196 ph10 815 #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
197     namesptr, cbuffer, size) \
198     rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
199     (char *)copynamesptr, cbuffer, size)
200    
201     #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
202     rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
203    
204 ph10 814 #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
205     offsets, size_offsets, workspace, size_workspace) \
206     count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
207     offsets, size_offsets, workspace, size_workspace)
208    
209 ph10 808 #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
210     offsets, size_offsets) \
211     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
212     offsets, size_offsets)
213    
214 zherczeg 809 #define PCRE_FREE_STUDY8(extra) \
215     pcre_free_study(extra)
216 ph10 808
217 ph10 815 #define PCRE_FREE_SUBSTRING8(substring) \
218     pcre_free_substring(substring)
219    
220     #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
221     pcre_free_substring_list(listptr)
222    
223     #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224     getnamesptr, subsptr) \
225     rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
226     (char *)getnamesptr, subsptr)
227    
228     #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
229     n = pcre_get_stringnumber(re, (char *)ptr)
230    
231     #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
232     rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
233    
234     #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
235     rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
236    
237 zherczeg 811 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
238     pcre_pattern_to_host_byte_order(re, extra, tables)
239    
240 ph10 813 #define PCRE_STUDY8(extra, re, options, error) \
241     extra = pcre_study(re, options, error)
242    
243 zherczeg 816 #define SET_PCRE_CALLOUT8(callout) \
244     pcre_callout = callout
245    
246 zherczeg 809 #endif /* SUPPORT_PCRE8 */
247    
248 ph10 815 /* -----------------------------------------------------------*/
249 zherczeg 809
250 ph10 808 #ifdef SUPPORT_PCRE16
251 ph10 813
252 ph10 808 #define PCHARS16(lv, p, len, f) \
253     lv = pchars16((PCRE_SPTR16)p, len, f)
254    
255     #define PCHARSV16(p, len, f) \
256     (void)pchars16((PCRE_SPTR16)p, len, f)
257    
258 zherczeg 816 #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
259 ph10 815
260 ph10 808 #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
261     re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
262    
263 ph10 815 #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
264     namesptr, cbuffer, size) \
265     rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
266     (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
267    
268     #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
269     rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
270     (PCRE_SCHAR16 *)cbuffer, size/2)
271    
272 ph10 814 #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
273     offsets, size_offsets, workspace, size_workspace) \
274     count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
275     options, offsets, size_offsets, workspace, size_workspace)
276 ph10 813
277 ph10 808 #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
278     offsets, size_offsets) \
279     count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
280     options, offsets, size_offsets)
281    
282 ph10 814 #define PCRE_FREE_STUDY16(extra) \
283     pcre16_free_study(extra)
284    
285 ph10 815 #define PCRE_FREE_SUBSTRING16(substring) \
286     pcre16_free_substring((PCRE_SPTR16)substring)
287    
288     #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
289     pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
290    
291     #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
292     getnamesptr, subsptr) \
293     rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
294     (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)subsptr)
295    
296     #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
297     n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
298    
299     #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
300     rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
301     (PCRE_SPTR16 *)subsptr)
302    
303     #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
304     rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
305     (PCRE_SPTR16 **)listptr)
306    
307 ph10 813 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
308     pcre16_pattern_to_host_byte_order(re, extra, tables)
309 ph10 810
310 ph10 808 #define PCRE_STUDY16(extra, re, options, error) \
311     extra = pcre16_study(re, options, error)
312    
313 zherczeg 816 #define SET_PCRE_CALLOUT16(callout) \
314     pcre16_callout = callout
315    
316 zherczeg 809 #endif /* SUPPORT_PCRE16 */
317    
318    
319 ph10 808 /* ----- Both modes are supported; a runtime test is needed ----- */
320    
321     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
322    
323 zherczeg 817 #define CHAR_SIZE (use_pcre16? 2:1)
324    
325 ph10 808 #define PCHARS(lv, p, len, f) \
326     if (use_pcre16) \
327     PCHARS16(lv, p, len, f); \
328     else \
329     PCHARS8(lv, p, len, f)
330    
331     #define PCHARSV(p, len, f) \
332     if (use_pcre16) \
333     PCHARSV16(p, len, f); \
334     else \
335     PCHARSV8(p, len, f)
336    
337 zherczeg 816 #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
338 ph10 815
339 ph10 808 #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
340     if (use_pcre16) \
341     PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
342     else \
343     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
344    
345 ph10 815 #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
346     namesptr, cbuffer, size) \
347     if (use_pcre16) \
348     PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
349     namesptr, cbuffer, size); \
350     else \
351     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
352     namesptr, cbuffer, size)
353    
354     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
355     if (use_pcre16) \
356     PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
357     else \
358     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
359    
360 ph10 814 #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
361     offsets, size_offsets, workspace, size_workspace) \
362 ph10 813 if (use_pcre16) \
363 ph10 814 PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
364     offsets, size_offsets, workspace, size_workspace); \
365 ph10 813 else \
366 ph10 814 PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
367     offsets, size_offsets, workspace, size_workspace)
368 ph10 813
369 ph10 808 #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
370     offsets, size_offsets) \
371     if (use_pcre16) \
372     PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
373     offsets, size_offsets); \
374     else \
375     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
376     offsets, size_offsets)
377    
378 ph10 814 #define PCRE_FREE_STUDY(extra) \
379     if (use_pcre16) \
380     PCRE_FREE_STUDY16(extra); \
381     else \
382     PCRE_FREE_STUDY8(extra)
383    
384 ph10 815 #define PCRE_FREE_SUBSTRING(substring) \
385     if (use_pcre16) \
386     PCRE_FREE_SUBSTRING16(substring); \
387     else \
388     PCRE_FREE_SUBSTRING8(substring)
389    
390     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
391     if (use_pcre16) \
392     PCRE_FREE_SUBSTRING_LIST16(listptr); \
393     else \
394     PCRE_FREE_SUBSTRING_LIST8(listptr)
395    
396     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
397     getnamesptr, subsptr) \
398     if (use_pcre16) \
399     PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
400     getnamesptr, subsptr); \
401     else \
402     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
403     getnamesptr, subsptr)
404    
405     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
406     if (use_pcre16) \
407     PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
408     else \
409     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
410    
411     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
412     if (use_pcre16) \
413     PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
414     else \
415     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
416    
417     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
418     if (use_pcre16) \
419     PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
420     else \
421     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
422    
423 ph10 813 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
424 ph10 810 if (use_pcre16) \
425 ph10 813 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
426 ph10 810 else \
427 ph10 813 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
428 ph10 810
429 ph10 808 #define PCRE_STUDY(extra, re, options, error) \
430     if (use_pcre16) \
431     PCRE_STUDY16(extra, re, options, error); \
432     else \
433     PCRE_STUDY8(extra, re, options, error)
434    
435 zherczeg 816 #define SET_PCRE_CALLOUT(callout) \
436     if (use_pcre16) \
437     SET_PCRE_CALLOUT16(callout); \
438     else \
439     SET_PCRE_CALLOUT8(callout)
440    
441 ph10 808 /* ----- Only 8-bit mode is supported ----- */
442    
443     #elif defined SUPPORT_PCRE8
444 zherczeg 817 #define CHAR_SIZE 1
445 ph10 815 #define PCHARS PCHARS8
446     #define PCHARSV PCHARSV8
447     #define STRLEN STRLEN8
448     #define PCRE_COMPILE PCRE_COMPILE8
449     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
450     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
451     #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
452     #define PCRE_EXEC PCRE_EXEC8
453     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
454     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
455     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
456     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
457     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
458     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
459     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
460 ph10 813 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
461 ph10 815 #define PCRE_STUDY PCRE_STUDY8
462 zherczeg 816 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
463 ph10 808
464     /* ----- Only 16-bit mode is supported ----- */
465    
466     #else
467 zherczeg 817 #define CHAR_SIZE 1
468 ph10 815 #define PCHARS PCHARS16
469     #define PCHARSV PCHARSV16
470     #define STRLEN STRLEN16
471     #define PCRE_COMPILE PCRE_COMPILE16
472     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
473     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
474     #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
475     #define PCRE_EXEC PCRE_EXEC16
476     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
477     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
478     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
479     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
480     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
481     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
482     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
483 ph10 813 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
484 ph10 815 #define PCRE_STUDY PCRE_STUDY16
485 zherczeg 816 #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
486 ph10 808 #endif
487    
488     /* ----- End of mode-specific function call macros ----- */
489    
490    
491 nigel 85 /* Other parameters */
492    
493 nigel 3 #ifndef CLOCKS_PER_SEC
494     #ifdef CLK_TCK
495     #define CLOCKS_PER_SEC CLK_TCK
496     #else
497     #define CLOCKS_PER_SEC 100
498     #endif
499     #endif
500    
501 nigel 93 /* This is the default loop count for timing. */
502    
503 nigel 75 #define LOOPREPEAT 500000
504 nigel 3
505 nigel 85 /* Static variables */
506    
507 nigel 3 static FILE *outfile;
508     static int log_store = 0;
509 nigel 63 static int callout_count;
510     static int callout_extra;
511     static int callout_fail_count;
512     static int callout_fail_id;
513 ph10 210 static int debug_lengths;
514 nigel 63 static int first_callout;
515 nigel 93 static int locale_set = 0;
516 nigel 73 static int show_malloc;
517 ph10 810 static int use_utf;
518 nigel 43 static size_t gotten_store;
519 ph10 801 static size_t first_gotten_store = 0;
520 ph10 645 static const unsigned char *last_callout_mark = NULL;
521 nigel 3
522 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
523    
524     static int buffer_size = 50000;
525 ph10 756 static pcre_uint8 *buffer = NULL;
526     static pcre_uint8 *dbuffer = NULL;
527     static pcre_uint8 *pbuffer = NULL;
528 nigel 3
529 ph10 814 /* Another buffer is needed translation to 16-bit character strings. It will
530 ph10 813 obtained and extended as required. */
531    
532 ph10 805 #ifdef SUPPORT_PCRE16
533     static int buffer16_size = 0;
534     static pcre_uint16 *buffer16 = NULL;
535 ph10 813
536 ph10 814 /* We need the table of operator lengths that is used for 16-bit compiling, in
537 ph10 813 order to swap bytes in a pattern for saving/reloading testing. Luckily, the
538     data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
539     appropriately for the 16-bit world. Just as a safety check, make sure that
540     COMPILE_PCRE16 is *not* set. */
541    
542     #ifdef COMPILE_PCRE16
543     #error COMPILE_PCRE16 must not be set when compiling pcretest.c
544 ph10 805 #endif
545    
546 ph10 813 #if LINK_SIZE == 2
547     #undef LINK_SIZE
548     #define LINK_SIZE 1
549     #elif LINK_SIZE == 3 || LINK_SIZE == 4
550     #undef LINK_SIZE
551     #define LINK_SIZE 2
552     #else
553     #error LINK_SIZE must be either 2, 3, or 4
554     #endif
555    
556     static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
557    
558     #endif /* SUPPORT_PCRE16 */
559    
560 ph10 808 /* If we have 8-bit support, default use_pcre16 to false; if there is also
561     16-bit support, it can be changed by an option. If there is no 8-bit support,
562     there must be 16-bit support, so default it to 1. */
563    
564     #ifdef SUPPORT_PCRE8
565     static int use_pcre16 = 0;
566     #else
567     static int use_pcre16 = 1;
568     #endif
569    
570 ph10 598 /* Textual explanations for runtime error codes */
571 nigel 75
572 ph10 598 static const char *errtexts[] = {
573     NULL, /* 0 is no error */
574     NULL, /* NOMATCH is handled specially */
575     "NULL argument passed",
576     "bad option value",
577     "magic number missing",
578     "unknown opcode - pattern overwritten?",
579     "no more memory",
580 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
581 ph10 598 "match limit exceeded",
582     "callout error code",
583     NULL, /* BADUTF8 is handled specially */
584     "bad UTF-8 offset",
585     NULL, /* PARTIAL is handled specially */
586     "not used - internal error",
587     "internal error - pattern overwritten?",
588     "bad count value",
589     "item unsupported for DFA matching",
590     "backreference condition or recursion test not supported for DFA matching",
591     "match limit not supported for DFA matching",
592     "workspace size exceeded in DFA matching",
593 ph10 654 "too much recursion for DFA matching",
594 ph10 598 "recursion limit exceeded",
595     "not used - internal error",
596     "invalid combination of newline options",
597     "bad offset value",
598 ph10 642 NULL, /* SHORTUTF8 is handled specially */
599 ph10 676 "nested recursion at the same subject position",
600 ph10 805 "JIT stack limit reached",
601 ph10 808 "pattern compiled in wrong mode (8-bit/16-bit error)"
602 ph10 598 };
603    
604 ph10 654
605 ph10 541 /*************************************************
606     * Alternate character tables *
607     *************************************************/
608 nigel 49
609 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
610     using the default tables of the library. However, the T option can be used to
611     select alternate sets of tables, for different kinds of testing. Note also that
612 ph10 541 the L (locale) option also adjusts the tables. */
613    
614 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
615 ph10 541 only ASCII characters. */
616    
617 ph10 808 static const pcre_uint8 tables0[] = {
618 ph10 541
619     /* This table is a lower casing table. */
620    
621     0, 1, 2, 3, 4, 5, 6, 7,
622     8, 9, 10, 11, 12, 13, 14, 15,
623     16, 17, 18, 19, 20, 21, 22, 23,
624     24, 25, 26, 27, 28, 29, 30, 31,
625     32, 33, 34, 35, 36, 37, 38, 39,
626     40, 41, 42, 43, 44, 45, 46, 47,
627     48, 49, 50, 51, 52, 53, 54, 55,
628     56, 57, 58, 59, 60, 61, 62, 63,
629     64, 97, 98, 99,100,101,102,103,
630     104,105,106,107,108,109,110,111,
631     112,113,114,115,116,117,118,119,
632     120,121,122, 91, 92, 93, 94, 95,
633     96, 97, 98, 99,100,101,102,103,
634     104,105,106,107,108,109,110,111,
635     112,113,114,115,116,117,118,119,
636     120,121,122,123,124,125,126,127,
637     128,129,130,131,132,133,134,135,
638     136,137,138,139,140,141,142,143,
639     144,145,146,147,148,149,150,151,
640     152,153,154,155,156,157,158,159,
641     160,161,162,163,164,165,166,167,
642     168,169,170,171,172,173,174,175,
643     176,177,178,179,180,181,182,183,
644     184,185,186,187,188,189,190,191,
645     192,193,194,195,196,197,198,199,
646     200,201,202,203,204,205,206,207,
647     208,209,210,211,212,213,214,215,
648     216,217,218,219,220,221,222,223,
649     224,225,226,227,228,229,230,231,
650     232,233,234,235,236,237,238,239,
651     240,241,242,243,244,245,246,247,
652     248,249,250,251,252,253,254,255,
653    
654     /* This table is a case flipping table. */
655    
656     0, 1, 2, 3, 4, 5, 6, 7,
657     8, 9, 10, 11, 12, 13, 14, 15,
658     16, 17, 18, 19, 20, 21, 22, 23,
659     24, 25, 26, 27, 28, 29, 30, 31,
660     32, 33, 34, 35, 36, 37, 38, 39,
661     40, 41, 42, 43, 44, 45, 46, 47,
662     48, 49, 50, 51, 52, 53, 54, 55,
663     56, 57, 58, 59, 60, 61, 62, 63,
664     64, 97, 98, 99,100,101,102,103,
665     104,105,106,107,108,109,110,111,
666     112,113,114,115,116,117,118,119,
667     120,121,122, 91, 92, 93, 94, 95,
668     96, 65, 66, 67, 68, 69, 70, 71,
669     72, 73, 74, 75, 76, 77, 78, 79,
670     80, 81, 82, 83, 84, 85, 86, 87,
671     88, 89, 90,123,124,125,126,127,
672     128,129,130,131,132,133,134,135,
673     136,137,138,139,140,141,142,143,
674     144,145,146,147,148,149,150,151,
675     152,153,154,155,156,157,158,159,
676     160,161,162,163,164,165,166,167,
677     168,169,170,171,172,173,174,175,
678     176,177,178,179,180,181,182,183,
679     184,185,186,187,188,189,190,191,
680     192,193,194,195,196,197,198,199,
681     200,201,202,203,204,205,206,207,
682     208,209,210,211,212,213,214,215,
683     216,217,218,219,220,221,222,223,
684     224,225,226,227,228,229,230,231,
685     232,233,234,235,236,237,238,239,
686     240,241,242,243,244,245,246,247,
687     248,249,250,251,252,253,254,255,
688    
689     /* This table contains bit maps for various character classes. Each map is 32
690     bytes long and the bits run from the least significant end of each byte. The
691     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
692     graph, print, punct, and cntrl. Other classes are built from combinations. */
693    
694     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
695     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
696     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
697     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
698    
699     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
700     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
701     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
702     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
703    
704     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
705     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
706     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
707     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
708    
709     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
710     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
711     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
712     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
713    
714     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
715     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
716     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
717     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
718    
719     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
720     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
721     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
722     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
723    
724     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
725     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
726     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
727     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
728    
729     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
730     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
731     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
732     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
733    
734     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
735     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
736     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
737     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
738    
739     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
740     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
741     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
742     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
743    
744     /* This table identifies various classes of character by individual bits:
745     0x01 white space character
746     0x02 letter
747     0x04 decimal digit
748     0x08 hexadecimal digit
749     0x10 alphanumeric or '_'
750     0x80 regular expression metacharacter or binary zero
751     */
752    
753     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
754     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
755     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
756     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
757     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
758     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
759     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
760     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
761     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
762     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
763     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
764     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
765     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
766     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
767     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
768     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
769     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
770     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
771     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
772     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
773     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
774     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
775     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
776     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
777     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
778     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
779     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
780     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
781     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
782     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
783     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
784     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
785    
786 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
787     be at least an approximation of ISO 8859. In particular, there are characters
788 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
789    
790 ph10 808 static const pcre_uint8 tables1[] = {
791 ph10 541 0,1,2,3,4,5,6,7,
792     8,9,10,11,12,13,14,15,
793     16,17,18,19,20,21,22,23,
794     24,25,26,27,28,29,30,31,
795     32,33,34,35,36,37,38,39,
796     40,41,42,43,44,45,46,47,
797     48,49,50,51,52,53,54,55,
798     56,57,58,59,60,61,62,63,
799     64,97,98,99,100,101,102,103,
800     104,105,106,107,108,109,110,111,
801     112,113,114,115,116,117,118,119,
802     120,121,122,91,92,93,94,95,
803     96,97,98,99,100,101,102,103,
804     104,105,106,107,108,109,110,111,
805     112,113,114,115,116,117,118,119,
806     120,121,122,123,124,125,126,127,
807     128,129,130,131,132,133,134,135,
808     136,137,138,139,140,141,142,143,
809     144,145,146,147,148,149,150,151,
810     152,153,154,155,156,157,158,159,
811     160,161,162,163,164,165,166,167,
812     168,169,170,171,172,173,174,175,
813     176,177,178,179,180,181,182,183,
814     184,185,186,187,188,189,190,191,
815     224,225,226,227,228,229,230,231,
816     232,233,234,235,236,237,238,239,
817     240,241,242,243,244,245,246,215,
818     248,249,250,251,252,253,254,223,
819     224,225,226,227,228,229,230,231,
820     232,233,234,235,236,237,238,239,
821     240,241,242,243,244,245,246,247,
822     248,249,250,251,252,253,254,255,
823     0,1,2,3,4,5,6,7,
824     8,9,10,11,12,13,14,15,
825     16,17,18,19,20,21,22,23,
826     24,25,26,27,28,29,30,31,
827     32,33,34,35,36,37,38,39,
828     40,41,42,43,44,45,46,47,
829     48,49,50,51,52,53,54,55,
830     56,57,58,59,60,61,62,63,
831     64,97,98,99,100,101,102,103,
832     104,105,106,107,108,109,110,111,
833     112,113,114,115,116,117,118,119,
834     120,121,122,91,92,93,94,95,
835     96,65,66,67,68,69,70,71,
836     72,73,74,75,76,77,78,79,
837     80,81,82,83,84,85,86,87,
838     88,89,90,123,124,125,126,127,
839     128,129,130,131,132,133,134,135,
840     136,137,138,139,140,141,142,143,
841     144,145,146,147,148,149,150,151,
842     152,153,154,155,156,157,158,159,
843     160,161,162,163,164,165,166,167,
844     168,169,170,171,172,173,174,175,
845     176,177,178,179,180,181,182,183,
846     184,185,186,187,188,189,190,191,
847     224,225,226,227,228,229,230,231,
848     232,233,234,235,236,237,238,239,
849     240,241,242,243,244,245,246,215,
850     248,249,250,251,252,253,254,223,
851     192,193,194,195,196,197,198,199,
852     200,201,202,203,204,205,206,207,
853     208,209,210,211,212,213,214,247,
854     216,217,218,219,220,221,222,255,
855     0,62,0,0,1,0,0,0,
856     0,0,0,0,0,0,0,0,
857     32,0,0,0,1,0,0,0,
858     0,0,0,0,0,0,0,0,
859     0,0,0,0,0,0,255,3,
860     126,0,0,0,126,0,0,0,
861     0,0,0,0,0,0,0,0,
862     0,0,0,0,0,0,0,0,
863     0,0,0,0,0,0,255,3,
864     0,0,0,0,0,0,0,0,
865     0,0,0,0,0,0,12,2,
866     0,0,0,0,0,0,0,0,
867     0,0,0,0,0,0,0,0,
868     254,255,255,7,0,0,0,0,
869     0,0,0,0,0,0,0,0,
870     255,255,127,127,0,0,0,0,
871     0,0,0,0,0,0,0,0,
872     0,0,0,0,254,255,255,7,
873     0,0,0,0,0,4,32,4,
874     0,0,0,128,255,255,127,255,
875     0,0,0,0,0,0,255,3,
876     254,255,255,135,254,255,255,7,
877     0,0,0,0,0,4,44,6,
878     255,255,127,255,255,255,127,255,
879     0,0,0,0,254,255,255,255,
880     255,255,255,255,255,255,255,127,
881     0,0,0,0,254,255,255,255,
882     255,255,255,255,255,255,255,255,
883     0,2,0,0,255,255,255,255,
884     255,255,255,255,255,255,255,127,
885     0,0,0,0,255,255,255,255,
886     255,255,255,255,255,255,255,255,
887     0,0,0,0,254,255,0,252,
888     1,0,0,248,1,0,0,120,
889     0,0,0,0,254,255,255,255,
890     0,0,128,0,0,0,128,0,
891     255,255,255,255,0,0,0,0,
892     0,0,0,0,0,0,0,128,
893     255,255,255,255,0,0,0,0,
894     0,0,0,0,0,0,0,0,
895     128,0,0,0,0,0,0,0,
896     0,1,1,0,1,1,0,0,
897     0,0,0,0,0,0,0,0,
898     0,0,0,0,0,0,0,0,
899     1,0,0,0,128,0,0,0,
900     128,128,128,128,0,0,128,0,
901     28,28,28,28,28,28,28,28,
902     28,28,0,0,0,0,0,128,
903     0,26,26,26,26,26,26,18,
904     18,18,18,18,18,18,18,18,
905     18,18,18,18,18,18,18,18,
906     18,18,18,128,128,0,128,16,
907     0,26,26,26,26,26,26,18,
908     18,18,18,18,18,18,18,18,
909     18,18,18,18,18,18,18,18,
910     18,18,18,128,128,0,0,0,
911     0,0,0,0,0,1,0,0,
912     0,0,0,0,0,0,0,0,
913     0,0,0,0,0,0,0,0,
914     0,0,0,0,0,0,0,0,
915     1,0,0,0,0,0,0,0,
916     0,0,18,0,0,0,0,0,
917     0,0,20,20,0,18,0,0,
918     0,20,18,0,0,0,0,0,
919     18,18,18,18,18,18,18,18,
920     18,18,18,18,18,18,18,18,
921     18,18,18,18,18,18,18,0,
922     18,18,18,18,18,18,18,18,
923     18,18,18,18,18,18,18,18,
924     18,18,18,18,18,18,18,18,
925     18,18,18,18,18,18,18,0,
926     18,18,18,18,18,18,18,18
927     };
928    
929    
930    
931 ph10 558
932     #ifndef HAVE_STRERROR
933 nigel 49 /*************************************************
934 ph10 558 * Provide strerror() for non-ANSI libraries *
935     *************************************************/
936    
937     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
938     in their libraries, but can provide the same facility by this simple
939     alternative function. */
940    
941     extern int sys_nerr;
942     extern char *sys_errlist[];
943    
944     char *
945     strerror(int n)
946     {
947     if (n < 0 || n >= sys_nerr) return "unknown error number";
948     return sys_errlist[n];
949     }
950     #endif /* HAVE_STRERROR */
951    
952    
953 ph10 667 /*************************************************
954     * JIT memory callback *
955     *************************************************/
956 ph10 558
957 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
958     {
959     return (pcre_jit_stack *)arg;
960     }
961 ph10 558
962 ph10 667
963 ph10 808 /*************************************************
964     * Convert UTF-8 string to value *
965     *************************************************/
966    
967     /* This function takes one or more bytes that represents a UTF-8 character,
968     and returns the value of the character.
969    
970     Argument:
971     utf8bytes a pointer to the byte vector
972     vptr a pointer to an int to receive the value
973    
974     Returns: > 0 => the number of bytes consumed
975     -6 to 0 => malformed UTF-8 character at offset = (-return)
976     */
977    
978     #if !defined NOUTF8
979    
980     static int
981     utf82ord(pcre_uint8 *utf8bytes, int *vptr)
982     {
983     int c = *utf8bytes++;
984     int d = c;
985     int i, j, s;
986    
987     for (i = -1; i < 6; i++) /* i is number of additional bytes */
988     {
989     if ((d & 0x80) == 0) break;
990     d <<= 1;
991     }
992    
993     if (i == -1) { *vptr = c; return 1; } /* ascii character */
994     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
995    
996     /* i now has a value in the range 1-5 */
997    
998     s = 6*i;
999     d = (c & utf8_table3[i]) << s;
1000    
1001     for (j = 0; j < i; j++)
1002     {
1003     c = *utf8bytes++;
1004     if ((c & 0xc0) != 0x80) return -(j+1);
1005     s -= 6;
1006     d |= (c & 0x3f) << s;
1007     }
1008    
1009     /* Check that encoding was the correct unique one */
1010    
1011     for (j = 0; j < utf8_table1_size; j++)
1012     if (d <= utf8_table1[j]) break;
1013     if (j != i) return -(i+1);
1014    
1015     /* Valid value */
1016    
1017     *vptr = d;
1018     return i+1;
1019     }
1020    
1021     #endif
1022    
1023    
1024    
1025     /*************************************************
1026     * Convert character value to UTF-8 *
1027     *************************************************/
1028    
1029     /* This function takes an integer value in the range 0 - 0x7fffffff
1030     and encodes it as a UTF-8 character in 0 to 6 bytes.
1031    
1032     Arguments:
1033     cvalue the character value
1034     utf8bytes pointer to buffer for result - at least 6 bytes long
1035    
1036     Returns: number of characters placed in the buffer
1037     */
1038    
1039     #if !defined NOUTF8
1040    
1041     static int
1042     ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1043     {
1044     register int i, j;
1045     for (i = 0; i < utf8_table1_size; i++)
1046     if (cvalue <= utf8_table1[i]) break;
1047     utf8bytes += i;
1048     for (j = i; j > 0; j--)
1049     {
1050     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1051     cvalue >>= 6;
1052     }
1053     *utf8bytes = utf8_table2[i] | cvalue;
1054     return i + 1;
1055     }
1056    
1057     #endif
1058    
1059    
1060    
1061 ph10 805 #ifdef SUPPORT_PCRE16
1062 ph10 558 /*************************************************
1063 ph10 805 * Convert a string to 16-bit *
1064     *************************************************/
1065    
1066 ph10 808 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1067     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1068     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1069     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1070 ph10 813 result is always left in buffer16.
1071 ph10 805
1072 ph10 810 Arguments:
1073     p points to a byte string
1074     utf true if UTF-8 (to be converted to UTF-16)
1075     len number of bytes in the string (excluding trailing zero)
1076 ph10 813
1077 ph10 810 Returns: number of 16-bit data items used (excluding trailing zero)
1078 ph10 813 OR -1 if a UTF-8 string is malformed
1079 ph10 810 */
1080    
1081 ph10 805 static int
1082 ph10 808 to16(pcre_uint8 *p, int utf, int len)
1083 ph10 805 {
1084     pcre_uint16 *pp;
1085    
1086 ph10 808 if (buffer16_size < 2*len + 2)
1087 ph10 805 {
1088     if (buffer16 != NULL) free(buffer16);
1089 ph10 808 buffer16_size = 2*len + 2;
1090 ph10 805 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1091 ph10 808 if (buffer16 == NULL)
1092 ph10 805 {
1093     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1094     exit(1);
1095     }
1096     }
1097 ph10 808
1098 ph10 805 pp = buffer16;
1099    
1100     if (!utf)
1101     {
1102 ph10 808 while (len-- > 0) *pp++ = *p++;
1103 ph10 805 }
1104 ph10 808
1105 ph10 805 else
1106     {
1107 ph10 808 int c;
1108     while (len > 0)
1109     {
1110     int chlen = utf82ord(p, &c);
1111 ph10 810 if (chlen <= 0) return -1;
1112 ph10 808 p += chlen;
1113 ph10 813 len -= chlen;
1114 ph10 808 if (c < 0x10000) *pp++ = c; else
1115     {
1116     c -= 0x10000;
1117     *pp++ = 0xD800 | (c >> 10);
1118     *pp++ = 0xDC00 | (c & 0x3ff);
1119     }
1120     }
1121     }
1122    
1123     *pp = 0;
1124 ph10 805 return pp - buffer16;
1125 ph10 808 }
1126 ph10 805 #endif
1127    
1128    
1129     /*************************************************
1130 nigel 91 * Read or extend an input line *
1131     *************************************************/
1132    
1133     /* Input lines are read into buffer, but both patterns and data lines can be
1134     continued over multiple input lines. In addition, if the buffer fills up, we
1135     want to automatically expand it so as to be able to handle extremely large
1136     lines that are needed for certain stress tests. When the input buffer is
1137     expanded, the other two buffers must also be expanded likewise, and the
1138     contents of pbuffer, which are a copy of the input for callouts, must be
1139     preserved (for when expansion happens for a data line). This is not the most
1140     optimal way of handling this, but hey, this is just a test program!
1141    
1142     Arguments:
1143     f the file to read
1144     start where in buffer to start (this *must* be within buffer)
1145 ph10 287 prompt for stdin or readline()
1146 nigel 91
1147     Returns: pointer to the start of new data
1148     could be a copy of start, or could be moved
1149     NULL if no data read and EOF reached
1150     */
1151    
1152 ph10 756 static pcre_uint8 *
1153     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1154 nigel 91 {
1155 ph10 756 pcre_uint8 *here = start;
1156 nigel 91
1157     for (;;)
1158     {
1159 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
1160 nigel 93
1161 nigel 91 if (rlen > 1000)
1162     {
1163     int dlen;
1164 ph10 289
1165 ph10 287 /* If libreadline support is required, use readline() to read a line if the
1166     input is a terminal. Note that readline() removes the trailing newline, so
1167     we must put it back again, to be compatible with fgets(). */
1168 ph10 289
1169 ph10 287 #ifdef SUPPORT_LIBREADLINE
1170     if (isatty(fileno(f)))
1171     {
1172 ph10 289 size_t len;
1173 ph10 287 char *s = readline(prompt);
1174     if (s == NULL) return (here == start)? NULL : start;
1175     len = strlen(s);
1176 ph10 289 if (len > 0) add_history(s);
1177 ph10 287 if (len > rlen - 1) len = rlen - 1;
1178     memcpy(here, s, len);
1179     here[len] = '\n';
1180 ph10 289 here[len+1] = 0;
1181     free(s);
1182 ph10 287 }
1183 ph10 289 else
1184     #endif
1185    
1186 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
1187 ph10 289
1188 ph10 287 {
1189 ph10 516 if (f == stdin) printf("%s", prompt);
1190 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
1191     return (here == start)? NULL : start;
1192 ph10 289 }
1193    
1194 nigel 91 dlen = (int)strlen((char *)here);
1195     if (dlen > 0 && here[dlen - 1] == '\n') return start;
1196     here += dlen;
1197     }
1198    
1199     else
1200     {
1201     int new_buffer_size = 2*buffer_size;
1202 ph10 808 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1203     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1204     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1205 nigel 91
1206     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1207     {
1208     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1209     exit(1);
1210     }
1211    
1212     memcpy(new_buffer, buffer, buffer_size);
1213     memcpy(new_pbuffer, pbuffer, buffer_size);
1214    
1215     buffer_size = new_buffer_size;
1216    
1217     start = new_buffer + (start - buffer);
1218     here = new_buffer + (here - buffer);
1219    
1220     free(buffer);
1221     free(dbuffer);
1222     free(pbuffer);
1223    
1224     buffer = new_buffer;
1225     dbuffer = new_dbuffer;
1226     pbuffer = new_pbuffer;
1227     }
1228     }
1229    
1230     return NULL; /* Control never gets here */
1231     }
1232    
1233    
1234    
1235     /*************************************************
1236 nigel 63 * Read number from string *
1237     *************************************************/
1238    
1239     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1240     around with conditional compilation, just do the job by hand. It is only used
1241 nigel 93 for unpicking arguments, so just keep it simple.
1242 nigel 63
1243     Arguments:
1244     str string to be converted
1245     endptr where to put the end pointer
1246    
1247     Returns: the unsigned long
1248     */
1249    
1250     static int
1251 ph10 808 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1252 nigel 63 {
1253     int result = 0;
1254     while(*str != 0 && isspace(*str)) str++;
1255     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1256     *endptr = str;
1257     return(result);
1258     }
1259    
1260    
1261    
1262 ph10 810 /*************************************************
1263     * Print one character *
1264     *************************************************/
1265    
1266     /* Print a single character either literally, or as a hex escape. */
1267    
1268     static int pchar(int c, FILE *f)
1269     {
1270     if (PRINTOK(c))
1271     {
1272     if (f != NULL) fprintf(f, "%c", c);
1273     return 1;
1274     }
1275 ph10 813
1276 ph10 810 if (c < 0x100)
1277     {
1278     if (use_utf)
1279 ph10 813 {
1280 ph10 810 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1281     return 6;
1282 ph10 813 }
1283     else
1284 ph10 810 {
1285     if (f != NULL) fprintf(f, "\\x%02x", c);
1286 ph10 813 return 4;
1287     }
1288 ph10 810 }
1289 ph10 813
1290 ph10 810 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1291     return (c <= 0x000000ff)? 6 :
1292     (c <= 0x00000fff)? 7 :
1293     (c <= 0x0000ffff)? 8 :
1294     (c <= 0x000fffff)? 9 : 10;
1295     }
1296    
1297    
1298    
1299 ph10 808 #ifdef SUPPORT_PCRE8
1300 nigel 49 /*************************************************
1301 ph10 808 * Print 8-bit character string *
1302 nigel 49 *************************************************/
1303    
1304 ph10 808 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1305     If handed a NULL file, just counts chars without printing. */
1306 nigel 49
1307 ph10 808 static int pchars(pcre_uint8 *p, int length, FILE *f)
1308 nigel 49 {
1309 nigel 85 int c = 0;
1310 nigel 63 int yield = 0;
1311 nigel 3
1312 zherczeg 817 if (length < 0)
1313     length = strlen((char *)p);
1314    
1315 nigel 63 while (length-- > 0)
1316 nigel 3 {
1317 nigel 79 #if !defined NOUTF8
1318 ph10 810 if (use_utf)
1319 nigel 63 {
1320     int rc = utf82ord(p, &c);
1321     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1322     {
1323     length -= rc - 1;
1324     p += rc;
1325 ph10 810 yield += pchar(c, f);
1326 ph10 813 continue;
1327 nigel 63 }
1328     }
1329 nigel 79 #endif
1330 nigel 93 c = *p++;
1331 ph10 810 yield += pchar(c, f);
1332 nigel 63 }
1333 nigel 3
1334 nigel 63 return yield;
1335     }
1336 ph10 808 #endif
1337 nigel 23
1338 nigel 3
1339 nigel 23
1340 ph10 808 #ifdef SUPPORT_PCRE16
1341 nigel 63 /*************************************************
1342 ph10 815 * Find length of 0-terminated 16-bit string *
1343     *************************************************/
1344    
1345     static int strlen16(PCRE_SPTR16 p)
1346     {
1347     int len = 0;
1348     while (*p++ != 0) len++;
1349     return len;
1350     }
1351    
1352    
1353    
1354     /*************************************************
1355 ph10 808 * Print 16-bit character string *
1356     *************************************************/
1357    
1358     /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1359     If handed a NULL file, just counts chars without printing. */
1360    
1361     static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1362     {
1363     int yield = 0;
1364    
1365 zherczeg 817 if (length < 0)
1366     length = strlen16(p);
1367    
1368 ph10 808 while (length-- > 0)
1369     {
1370     int c = *p++ & 0xffff;
1371     #if !defined NOUTF8
1372 ph10 810 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1373 ph10 808 {
1374     int d = *p & 0xffff;
1375     if (d >= 0xDC00 && d < 0xDFFF)
1376     {
1377     c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1378     length--;
1379 ph10 813 p++;
1380 ph10 808 }
1381 ph10 813 }
1382 ph10 808 #endif
1383 ph10 810 yield += pchar(c, f);
1384 ph10 808 }
1385    
1386     return yield;
1387     }
1388     #endif
1389    
1390    
1391    
1392     /*************************************************
1393 nigel 63 * Callout function *
1394     *************************************************/
1395 nigel 3
1396 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1397     the match. Yield zero unless more callouts than the fail count, or the callout
1398     data is not zero. */
1399 nigel 3
1400 nigel 63 static int callout(pcre_callout_block *cb)
1401     {
1402     FILE *f = (first_callout | callout_extra)? outfile : NULL;
1403 nigel 75 int i, pre_start, post_start, subject_length;
1404 nigel 3
1405 nigel 63 if (callout_extra)
1406     {
1407     fprintf(f, "Callout %d: last capture = %d\n",
1408     cb->callout_number, cb->capture_last);
1409 nigel 3
1410 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
1411     {
1412     if (cb->offset_vector[i] < 0)
1413     fprintf(f, "%2d: <unset>\n", i/2);
1414     else
1415     {
1416     fprintf(f, "%2d: ", i/2);
1417 ph10 808 PCHARSV(cb->subject + cb->offset_vector[i],
1418 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1419     fprintf(f, "\n");
1420     }
1421     }
1422     }
1423 nigel 3
1424 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
1425     datails. On subsequent calls in the same match, we use pchars just to find the
1426     printed lengths of the substrings. */
1427 nigel 3
1428 nigel 63 if (f != NULL) fprintf(f, "--->");
1429 nigel 3
1430 ph10 808 PCHARS(pre_start, cb->subject, cb->start_match, f);
1431     PCHARS(post_start, cb->subject + cb->start_match,
1432 nigel 63 cb->current_position - cb->start_match, f);
1433 nigel 3
1434 ph10 808 PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1435 nigel 75
1436 ph10 808 PCHARSV(cb->subject + cb->current_position,
1437 nigel 63 cb->subject_length - cb->current_position, f);
1438 nigel 3
1439 nigel 63 if (f != NULL) fprintf(f, "\n");
1440 nigel 9
1441 nigel 63 /* Always print appropriate indicators, with callout number if not already
1442 nigel 75 shown. For automatic callouts, show the pattern offset. */
1443 nigel 3
1444 nigel 75 if (cb->callout_number == 255)
1445     {
1446     fprintf(outfile, "%+3d ", cb->pattern_position);
1447     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1448     }
1449     else
1450     {
1451     if (callout_extra) fprintf(outfile, " ");
1452     else fprintf(outfile, "%3d ", cb->callout_number);
1453     }
1454 nigel 3
1455 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1456     fprintf(outfile, "^");
1457 nigel 3
1458 nigel 63 if (post_start > 0)
1459     {
1460     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1461     fprintf(outfile, "^");
1462 nigel 3 }
1463    
1464 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1465     fprintf(outfile, " ");
1466    
1467     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1468     pbuffer + cb->pattern_position);
1469    
1470 nigel 63 fprintf(outfile, "\n");
1471     first_callout = 0;
1472 nigel 3
1473 ph10 654 if (cb->mark != last_callout_mark)
1474 ph10 645 {
1475 ph10 654 fprintf(outfile, "Latest Mark: %s\n",
1476 ph10 645 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1477 ph10 654 last_callout_mark = cb->mark;
1478     }
1479 ph10 645
1480 nigel 71 if (cb->callout_data != NULL)
1481 nigel 49 {
1482 nigel 71 int callout_data = *((int *)(cb->callout_data));
1483     if (callout_data != 0)
1484     {
1485     fprintf(outfile, "Callout data = %d\n", callout_data);
1486     return callout_data;
1487     }
1488 nigel 63 }
1489 nigel 49
1490 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
1491     (++callout_count >= callout_fail_count)? 1 : 0;
1492 nigel 3 }
1493    
1494    
1495 nigel 63 /*************************************************
1496 nigel 73 * Local malloc functions *
1497 nigel 63 *************************************************/
1498 nigel 3
1499 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1500 ph10 801 compiled re, which is the first store request that pcre_compile() makes. The
1501     show_malloc variable is set only during matching. */
1502 nigel 3
1503     static void *new_malloc(size_t size)
1504     {
1505 nigel 73 void *block = malloc(size);
1506 nigel 43 gotten_store = size;
1507 ph10 801 if (first_gotten_store == 0) first_gotten_store = size;
1508 nigel 73 if (show_malloc)
1509 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1510 nigel 73 return block;
1511 nigel 3 }
1512    
1513 nigel 73 static void new_free(void *block)
1514     {
1515     if (show_malloc)
1516     fprintf(outfile, "free %p\n", block);
1517     free(block);
1518     }
1519 nigel 3
1520 nigel 73 /* For recursion malloc/free, to test stacking calls */
1521    
1522     static void *stack_malloc(size_t size)
1523     {
1524     void *block = malloc(size);
1525     if (show_malloc)
1526 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1527 nigel 73 return block;
1528     }
1529    
1530     static void stack_free(void *block)
1531     {
1532     if (show_malloc)
1533     fprintf(outfile, "stack_free %p\n", block);
1534     free(block);
1535     }
1536    
1537    
1538 nigel 63 /*************************************************
1539     * Call pcre_fullinfo() *
1540     *************************************************/
1541 nigel 43
1542 ph10 808 /* Get one piece of information from the pcre_fullinfo() function. When only
1543     one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1544     value, but the code is defensive. */
1545 nigel 43
1546     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1547     {
1548     int rc;
1549 ph10 808
1550     if (use_pcre16)
1551     #ifdef SUPPORT_PCRE16
1552     rc = pcre16_fullinfo(re, study, option, ptr);
1553     #else
1554     rc = PCRE_ERROR_BADMODE;
1555     #endif
1556     else
1557     #ifdef SUPPORT_PCRE8
1558     rc = pcre_fullinfo(re, study, option, ptr);
1559     #else
1560     rc = PCRE_ERROR_BADMODE;
1561     #endif
1562    
1563     if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1564     use_pcre16? "16" : "", option);
1565 nigel 43 }
1566    
1567    
1568    
1569 nigel 63 /*************************************************
1570 ph10 813 * Swap byte functions *
1571 nigel 75 *************************************************/
1572    
1573 ph10 813 /* The following functions swap the bytes of a pcre_uint16
1574     and pcre_uint32 value.
1575    
1576     Arguments:
1577     value any number
1578    
1579     Returns: the byte swapped value
1580     */
1581    
1582     static pcre_uint32
1583     swap_uint32(pcre_uint32 value)
1584 nigel 75 {
1585     return ((value & 0x000000ff) << 24) |
1586     ((value & 0x0000ff00) << 8) |
1587     ((value & 0x00ff0000) >> 8) |
1588 ph10 813 (value >> 24);
1589 nigel 75 }
1590    
1591 ph10 813 static pcre_uint16
1592     swap_uint16(pcre_uint16 value)
1593     {
1594     return (value >> 8) | (value << 8);
1595     }
1596 nigel 75
1597    
1598    
1599     /*************************************************
1600 ph10 813 * Flip bytes in a compiled pattern *
1601     *************************************************/
1602    
1603 ph10 814 /* This function is called if the 'F' option was present on a pattern that is
1604 ph10 813 to be written to a file. We flip the bytes of all the integer fields in the
1605     regex data block and the study block. In 16-bit mode this also flips relevant
1606     bytes in the pattern itself. This is to make it possible to test PCRE's
1607     ability to reload byte-flipped patterns, e.g. those compiled on a different
1608     architecture. */
1609    
1610     static void
1611     regexflip(pcre *ere, pcre_extra *extra)
1612     {
1613     real_pcre *re = (real_pcre *)ere;
1614     int op;
1615    
1616     #ifdef SUPPORT_PCRE16
1617     pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1618     int length = re->name_count * re->name_entry_size;
1619     #ifdef SUPPORT_UTF
1620     BOOL utf = (re->options & PCRE_UTF16) != 0;
1621     BOOL utf16_char = FALSE;
1622     #endif /* SUPPORT_UTF */
1623     #endif /* SUPPORT_PCRE16 */
1624    
1625     /* Always flip the bytes in the main data block and study blocks. */
1626    
1627     re->magic_number = REVERSED_MAGIC_NUMBER;
1628     re->size = swap_uint32(re->size);
1629     re->options = swap_uint32(re->options);
1630     re->flags = swap_uint16(re->flags);
1631     re->top_bracket = swap_uint16(re->top_bracket);
1632     re->top_backref = swap_uint16(re->top_backref);
1633     re->first_char = swap_uint16(re->first_char);
1634     re->req_char = swap_uint16(re->req_char);
1635     re->name_table_offset = swap_uint16(re->name_table_offset);
1636     re->name_entry_size = swap_uint16(re->name_entry_size);
1637     re->name_count = swap_uint16(re->name_count);
1638    
1639     if (extra != NULL)
1640     {
1641     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1642     rsd->size = swap_uint32(rsd->size);
1643     rsd->flags = swap_uint32(rsd->flags);
1644     rsd->minlength = swap_uint32(rsd->minlength);
1645     }
1646    
1647 ph10 814 /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1648     in the name table, if present, and then in the pattern itself. */
1649    
1650 ph10 813 #ifdef SUPPORT_PCRE16
1651     if (!use_pcre16) return;
1652    
1653     while(TRUE)
1654     {
1655     /* Swap previous characters. */
1656     while (length-- > 0)
1657     {
1658     *ptr = swap_uint16(*ptr);
1659     ptr++;
1660     }
1661     #ifdef SUPPORT_UTF
1662     if (utf16_char)
1663     {
1664 ph10 814 if ((ptr[-1] & 0xfc00) == 0xd800)
1665 ph10 813 {
1666     /* We know that there is only one extra character in UTF-16. */
1667     *ptr = swap_uint16(*ptr);
1668     ptr++;
1669     }
1670     }
1671     utf16_char = FALSE;
1672     #endif /* SUPPORT_UTF */
1673    
1674     /* Get next opcode. */
1675 ph10 814
1676 ph10 813 length = 0;
1677 ph10 814 op = *ptr;
1678 ph10 813 *ptr++ = swap_uint16(op);
1679 ph10 814
1680 ph10 813 switch (op)
1681     {
1682     case OP_END:
1683     return;
1684    
1685 zherczeg 816 #ifdef SUPPORT_UTF
1686 ph10 813 case OP_CHAR:
1687     case OP_CHARI:
1688     case OP_NOT:
1689     case OP_NOTI:
1690     case OP_STAR:
1691     case OP_MINSTAR:
1692     case OP_PLUS:
1693     case OP_MINPLUS:
1694     case OP_QUERY:
1695     case OP_MINQUERY:
1696     case OP_UPTO:
1697     case OP_MINUPTO:
1698     case OP_EXACT:
1699     case OP_POSSTAR:
1700     case OP_POSPLUS:
1701     case OP_POSQUERY:
1702     case OP_POSUPTO:
1703     case OP_STARI:
1704     case OP_MINSTARI:
1705     case OP_PLUSI:
1706     case OP_MINPLUSI:
1707     case OP_QUERYI:
1708     case OP_MINQUERYI:
1709     case OP_UPTOI:
1710     case OP_MINUPTOI:
1711     case OP_EXACTI:
1712     case OP_POSSTARI:
1713     case OP_POSPLUSI:
1714     case OP_POSQUERYI:
1715     case OP_POSUPTOI:
1716     case OP_NOTSTAR:
1717     case OP_NOTMINSTAR:
1718     case OP_NOTPLUS:
1719     case OP_NOTMINPLUS:
1720     case OP_NOTQUERY:
1721     case OP_NOTMINQUERY:
1722     case OP_NOTUPTO:
1723     case OP_NOTMINUPTO:
1724     case OP_NOTEXACT:
1725     case OP_NOTPOSSTAR:
1726     case OP_NOTPOSPLUS:
1727     case OP_NOTPOSQUERY:
1728     case OP_NOTPOSUPTO:
1729     case OP_NOTSTARI:
1730     case OP_NOTMINSTARI:
1731     case OP_NOTPLUSI:
1732     case OP_NOTMINPLUSI:
1733     case OP_NOTQUERYI:
1734     case OP_NOTMINQUERYI:
1735     case OP_NOTUPTOI:
1736     case OP_NOTMINUPTOI:
1737     case OP_NOTEXACTI:
1738     case OP_NOTPOSSTARI:
1739     case OP_NOTPOSPLUSI:
1740     case OP_NOTPOSQUERYI:
1741     case OP_NOTPOSUPTOI:
1742     if (utf) utf16_char = TRUE;
1743 ph10 814 #endif
1744 zherczeg 816 /* Fall through. */
1745    
1746     default:
1747 ph10 813 length = OP_lengths16[op] - 1;
1748     break;
1749    
1750     case OP_CLASS:
1751     case OP_NCLASS:
1752     /* Skip the character bit map. */
1753     ptr += 32/sizeof(pcre_uint16);
1754     length = 0;
1755     break;
1756    
1757     case OP_XCLASS:
1758     /* Reverse the size of the XCLASS instance. */
1759     ptr++;
1760     *ptr = swap_uint16(*ptr);
1761     if (LINK_SIZE > 1)
1762     {
1763     /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1764     ptr++;
1765     *ptr = swap_uint16(*ptr);
1766     }
1767     ptr++;
1768 ph10 814
1769 ph10 813 if (LINK_SIZE > 1)
1770     length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1771 ph10 814 (1 + LINK_SIZE + 1);
1772     else
1773 ph10 813 length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1774 ph10 814
1775 ph10 813 op = *ptr;
1776     *ptr = swap_uint16(op);
1777     if ((op & XCL_MAP) != 0)
1778     {
1779     /* Skip the character bit map. */
1780     ptr += 32/sizeof(pcre_uint16);
1781     length -= 32/sizeof(pcre_uint16);
1782     }
1783     break;
1784     }
1785     }
1786     /* Control should never reach here in 16 bit mode. */
1787     #endif /* SUPPORT_PCRE16 */
1788 ph10 814 }
1789 ph10 813
1790    
1791    
1792     /*************************************************
1793 nigel 87 * Check match or recursion limit *
1794     *************************************************/
1795    
1796     static int
1797 ph10 756 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1798 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
1799     int flag, unsigned long int *limit, int errnumber, const char *msg)
1800     {
1801     int count;
1802     int min = 0;
1803     int mid = 64;
1804     int max = -1;
1805    
1806     extra->flags |= flag;
1807    
1808     for (;;)
1809     {
1810     *limit = mid;
1811    
1812 ph10 808 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1813 nigel 87 use_offsets, use_size_offsets);
1814    
1815     if (count == errnumber)
1816     {
1817     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1818     min = mid;
1819     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1820     }
1821    
1822     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1823     count == PCRE_ERROR_PARTIAL)
1824     {
1825     if (mid == min + 1)
1826     {
1827     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1828     break;
1829     }
1830     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1831     max = mid;
1832     mid = (min + mid)/2;
1833     }
1834     else break; /* Some other error */
1835     }
1836    
1837     extra->flags &= ~flag;
1838     return count;
1839     }
1840    
1841    
1842    
1843     /*************************************************
1844 ph10 227 * Case-independent strncmp() function *
1845     *************************************************/
1846    
1847     /*
1848     Arguments:
1849     s first string
1850     t second string
1851     n number of characters to compare
1852    
1853     Returns: < 0, = 0, or > 0, according to the comparison
1854     */
1855    
1856     static int
1857 ph10 756 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1858 ph10 227 {
1859     while (n--)
1860     {
1861     int c = tolower(*s++) - tolower(*t++);
1862     if (c) return c;
1863     }
1864     return 0;
1865     }
1866    
1867    
1868    
1869     /*************************************************
1870 nigel 91 * Check newline indicator *
1871     *************************************************/
1872    
1873 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1874     a message and return 0 if there is no match.
1875 nigel 91
1876     Arguments:
1877     p points after the leading '<'
1878     f file for error message
1879    
1880     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1881     */
1882    
1883     static int
1884 ph10 756 check_newline(pcre_uint8 *p, FILE *f)
1885 nigel 91 {
1886 ph10 756 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1887     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1888     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1889     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1890     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1891     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1892     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1893 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1894     return 0;
1895     }
1896    
1897    
1898    
1899     /*************************************************
1900 nigel 93 * Usage function *
1901     *************************************************/
1902    
1903     static void
1904     usage(void)
1905     {
1906 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1907     printf("Input and output default to stdin and stdout.\n");
1908     #ifdef SUPPORT_LIBREADLINE
1909     printf("If input is a terminal, readline() is used to read from it.\n");
1910     #else
1911     printf("This version of pcretest is not linked with readline().\n");
1912     #endif
1913     printf("\nOptions:\n");
1914 ph10 805 #ifdef SUPPORT_PCRE16
1915     printf(" -16 use 16-bit interface\n");
1916     #endif
1917 nigel 93 printf(" -b show compiled code (bytecode)\n");
1918     printf(" -C show PCRE compile-time options and exit\n");
1919     printf(" -d debug: show compiled code and information (-b and -i)\n");
1920     #if !defined NODFA
1921     printf(" -dfa force DFA matching for all subjects\n");
1922     #endif
1923     printf(" -help show usage information\n");
1924     printf(" -i show information about compiled patterns\n"
1925 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1926 nigel 93 " -m output memory used information\n"
1927     " -o <n> set size of offsets vector to <n>\n");
1928     #if !defined NOPOSIX
1929     printf(" -p use POSIX interface\n");
1930     #endif
1931     printf(" -q quiet: do not output PCRE version number at start\n");
1932     printf(" -S <n> set stack size to <n> megabytes\n");
1933 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
1934     " -s+ force each pattern to be studied, using JIT if available\n"
1935 nigel 93 " -t time compilation and execution\n");
1936     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1937     printf(" -tm time execution (matching) only\n");
1938     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1939     }
1940    
1941    
1942    
1943     /*************************************************
1944 nigel 63 * Main Program *
1945     *************************************************/
1946 nigel 43
1947 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1948     consist of a regular expression, in delimiters and optionally followed by
1949     options, followed by a set of test data, terminated by an empty line. */
1950    
1951     int main(int argc, char **argv)
1952     {
1953     FILE *infile = stdin;
1954 ph10 815 const char *version;
1955 nigel 3 int options = 0;
1956     int study_options = 0;
1957 ph10 386 int default_find_match_limit = FALSE;
1958 nigel 3 int op = 1;
1959     int timeit = 0;
1960 nigel 93 int timeitm = 0;
1961 nigel 3 int showinfo = 0;
1962 nigel 31 int showstore = 0;
1963 ph10 667 int force_study = -1;
1964     int force_study_options = 0;
1965 nigel 87 int quiet = 0;
1966 nigel 53 int size_offsets = 45;
1967     int size_offsets_max;
1968 nigel 77 int *offsets = NULL;
1969 nigel 53 #if !defined NOPOSIX
1970 nigel 3 int posix = 0;
1971 nigel 53 #endif
1972 nigel 3 int debug = 0;
1973 nigel 11 int done = 0;
1974 nigel 77 int all_use_dfa = 0;
1975     int yield = 0;
1976 nigel 91 int stack_size;
1977 nigel 3
1978 ph10 667 pcre_jit_stack *jit_stack = NULL;
1979    
1980 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1981     that 1024 is plenty long enough for the few names we'll be testing. */
1982 nigel 69
1983 ph10 756 pcre_uchar copynames[1024];
1984     pcre_uchar getnames[1024];
1985 nigel 91
1986 ph10 756 pcre_uchar *copynamesptr;
1987     pcre_uchar *getnamesptr;
1988 nigel 91
1989 ph10 805 /* Get buffers from malloc() so that valgrind will check their misuse when
1990 ph10 808 debugging. They grow automatically when very long lines are read. The 16-bit
1991 ph10 805 buffer (buffer16) is obtained only if needed. */
1992 nigel 69
1993 ph10 756 buffer = (pcre_uint8 *)malloc(buffer_size);
1994     dbuffer = (pcre_uint8 *)malloc(buffer_size);
1995     pbuffer = (pcre_uint8 *)malloc(buffer_size);
1996 nigel 69
1997 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1998 nigel 3
1999 nigel 93 outfile = stdout;
2000    
2001     /* The following _setmode() stuff is some Windows magic that tells its runtime
2002     library to translate CRLF into a single LF character. At least, that's what
2003     I've been told: never having used Windows I take this all on trust. Originally
2004     it set 0x8000, but then I was advised that _O_BINARY was better. */
2005    
2006 nigel 75 #if defined(_WIN32) || defined(WIN32)
2007 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
2008     #endif
2009 nigel 75
2010 ph10 815 /* Get the version number: both pcre_version() and pcre16_version() give the
2011     same answer. We just need to ensure that we call one that is availab.e */
2012    
2013     #ifdef SUPPORT_PCRE8
2014     version = pcre_version();
2015     #else
2016     version = pcre16_version();
2017     #endif
2018    
2019 nigel 3 /* Scan options */
2020    
2021     while (argc > 1 && argv[op][0] == '-')
2022     {
2023 ph10 808 pcre_uint8 *endptr;
2024 nigel 53
2025 ph10 808 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2026 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2027 ph10 691 else if (strcmp(argv[op], "-s+") == 0)
2028 ph10 667 {
2029     force_study = 1;
2030     force_study_options = PCRE_STUDY_JIT_COMPILE;
2031 ph10 691 }
2032 ph10 808 #ifdef SUPPORT_PCRE16
2033     else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
2034     #endif
2035    
2036 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2037 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2038 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2039     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2040 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2041 nigel 79 #if !defined NODFA
2042 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2043 nigel 79 #endif
2044 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2045 ph10 808 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2046 nigel 65 *endptr == 0))
2047 nigel 53 {
2048     op++;
2049     argc--;
2050     }
2051 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2052     {
2053     int both = argv[op][2] == 0;
2054     int temp;
2055 ph10 808 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2056 nigel 93 *endptr == 0))
2057     {
2058     timeitm = temp;
2059     op++;
2060     argc--;
2061     }
2062     else timeitm = LOOPREPEAT;
2063     if (both) timeit = timeitm;
2064     }
2065 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2066 ph10 808 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2067 nigel 91 *endptr == 0))
2068     {
2069 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2070 nigel 91 printf("PCRE: -S not supported on this OS\n");
2071     exit(1);
2072     #else
2073     int rc;
2074     struct rlimit rlim;
2075     getrlimit(RLIMIT_STACK, &rlim);
2076     rlim.rlim_cur = stack_size * 1024 * 1024;
2077     rc = setrlimit(RLIMIT_STACK, &rlim);
2078     if (rc != 0)
2079     {
2080     printf("PCRE: setrlimit() failed with error %d\n", rc);
2081     exit(1);
2082     }
2083     op++;
2084     argc--;
2085     #endif
2086     }
2087 nigel 53 #if !defined NOPOSIX
2088 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2089 nigel 53 #endif
2090 nigel 63 else if (strcmp(argv[op], "-C") == 0)
2091     {
2092     int rc;
2093 ph10 392 unsigned long int lrc;
2094 ph10 815 printf("PCRE version %s\n", version);
2095 nigel 63 printf("Compiled with\n");
2096 ph10 805
2097 ph10 808 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2098     are set, either both UTFs are supported or both are not supported. */
2099    
2100 ph10 805 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2101     printf(" 8-bit and 16-bit support\n");
2102 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2103 ph10 808 if (rc)
2104     printf(" UTF-8 and UTF-16 support\n");
2105 ph10 813 else
2106 ph10 808 printf(" No UTF-8 or UTF-16 support\n");
2107 ph10 805 #elif defined SUPPORT_PCRE8
2108     printf(" 8-bit support only\n");
2109     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2110     printf(" %sUTF-8 support\n", rc? "" : "No ");
2111 ph10 808 #else
2112 ph10 805 printf(" 16-bit support only\n");
2113     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2114     printf(" %sUTF-16 support\n", rc? "" : "No ");
2115 ph10 808 #endif
2116    
2117 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2118     printf(" %sUnicode properties support\n", rc? "" : "No ");
2119 ph10 667 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
2120 ph10 674 if (rc)
2121 ph10 689 printf(" Just-in-time compiler support\n");
2122 ph10 674 else
2123     printf(" No just-in-time compiler support\n");
2124 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
2125 ph10 391 /* Note that these values are always the ASCII values, even
2126 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
2127 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2128     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2129 ph10 150 (rc == -2)? "ANYCRLF" :
2130 nigel 93 (rc == -1)? "ANY" : "???");
2131 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
2132     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2133     "all Unicode newlines");
2134 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
2135     printf(" Internal link size = %d\n", rc);
2136     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2137     printf(" POSIX malloc threshold = %d\n", rc);
2138 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2139     printf(" Default match limit = %ld\n", lrc);
2140     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2141     printf(" Default recursion depth limit = %ld\n", lrc);
2142 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
2143     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2144 ph10 121 goto EXIT;
2145 nigel 63 }
2146 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
2147     strcmp(argv[op], "--help") == 0)
2148     {
2149     usage();
2150     goto EXIT;
2151     }
2152 nigel 3 else
2153     {
2154 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
2155 nigel 93 usage();
2156 nigel 77 yield = 1;
2157     goto EXIT;
2158 nigel 3 }
2159     op++;
2160     argc--;
2161     }
2162    
2163 nigel 53 /* Get the store for the offsets vector, and remember what it was */
2164    
2165     size_offsets_max = size_offsets;
2166 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2167 nigel 53 if (offsets == NULL)
2168     {
2169     printf("** Failed to get %d bytes of memory for offsets vector\n",
2170 ph10 151 (int)(size_offsets_max * sizeof(int)));
2171 nigel 77 yield = 1;
2172     goto EXIT;
2173 nigel 53 }
2174    
2175 nigel 3 /* Sort out the input and output files */
2176    
2177     if (argc > 1)
2178     {
2179 nigel 93 infile = fopen(argv[op], INPUT_MODE);
2180 nigel 3 if (infile == NULL)
2181     {
2182     printf("** Failed to open %s\n", argv[op]);
2183 nigel 77 yield = 1;
2184     goto EXIT;
2185 nigel 3 }
2186     }
2187    
2188     if (argc > 2)
2189     {
2190 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
2191 nigel 3 if (outfile == NULL)
2192     {
2193     printf("** Failed to open %s\n", argv[op+1]);
2194 nigel 77 yield = 1;
2195     goto EXIT;
2196 nigel 3 }
2197     }
2198    
2199     /* Set alternative malloc function */
2200    
2201 ph10 805 #ifdef SUPPORT_PCRE8
2202 nigel 3 pcre_malloc = new_malloc;
2203 nigel 73 pcre_free = new_free;
2204     pcre_stack_malloc = stack_malloc;
2205     pcre_stack_free = stack_free;
2206 ph10 805 #endif
2207 nigel 3
2208 ph10 805 #ifdef SUPPORT_PCRE16
2209     pcre16_malloc = new_malloc;
2210     pcre16_free = new_free;
2211     pcre16_stack_malloc = stack_malloc;
2212     pcre16_stack_free = stack_free;
2213     #endif
2214    
2215 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
2216 nigel 3
2217 ph10 815 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2218 nigel 3
2219     /* Main loop */
2220    
2221 nigel 11 while (!done)
2222 nigel 3 {
2223     pcre *re = NULL;
2224     pcre_extra *extra = NULL;
2225 nigel 37
2226     #if !defined NOPOSIX /* There are still compilers that require no indent */
2227 nigel 3 regex_t preg;
2228 nigel 45 int do_posix = 0;
2229 nigel 37 #endif
2230    
2231 nigel 7 const char *error;
2232 ph10 808 pcre_uint8 *markptr;
2233     pcre_uint8 *p, *pp, *ppp;
2234     pcre_uint8 *to_file = NULL;
2235     const pcre_uint8 *tables = NULL;
2236 nigel 75 unsigned long int true_size, true_study_size = 0;
2237     size_t size, regex_gotten_store;
2238 ph10 654 int do_allcaps = 0;
2239 ph10 512 int do_mark = 0;
2240 nigel 3 int do_study = 0;
2241 ph10 654 int no_force_study = 0;
2242 nigel 25 int do_debug = debug;
2243 nigel 35 int do_G = 0;
2244     int do_g = 0;
2245 nigel 25 int do_showinfo = showinfo;
2246 nigel 35 int do_showrest = 0;
2247 ph10 616 int do_showcaprest = 0;
2248 nigel 75 int do_flip = 0;
2249 nigel 93 int erroroffset, len, delimiter, poffset;
2250 nigel 3
2251 ph10 810 use_utf = 0;
2252 ph10 211 debug_lengths = 1;
2253 nigel 63
2254 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2255 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2256 nigel 63 fflush(outfile);
2257 nigel 3
2258     p = buffer;
2259     while (isspace(*p)) p++;
2260     if (*p == 0) continue;
2261    
2262 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
2263 nigel 3
2264 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2265     {
2266 nigel 91 unsigned long int magic, get_options;
2267 ph10 756 pcre_uint8 sbuf[8];
2268 nigel 75 FILE *f;
2269    
2270     p++;
2271     pp = p + (int)strlen((char *)p);
2272     while (isspace(pp[-1])) pp--;
2273     *pp = 0;
2274    
2275     f = fopen((char *)p, "rb");
2276     if (f == NULL)
2277     {
2278     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2279     continue;
2280     }
2281    
2282     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2283    
2284     true_size =
2285     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2286     true_study_size =
2287     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2288    
2289     re = (real_pcre *)new_malloc(true_size);
2290 ph10 801 regex_gotten_store = first_gotten_store;
2291 nigel 75
2292     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2293    
2294     magic = ((real_pcre *)re)->magic_number;
2295     if (magic != MAGIC_NUMBER)
2296     {
2297 ph10 813 if (swap_uint32(magic) == MAGIC_NUMBER)
2298 nigel 75 {
2299     do_flip = 1;
2300     }
2301     else
2302     {
2303     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2304     fclose(f);
2305     continue;
2306     }
2307     }
2308    
2309 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2310 nigel 75 do_flip? " (byte-inverted)" : "", p);
2311    
2312 ph10 612 /* Now see if there is any following study data. */
2313 nigel 75
2314     if (true_study_size != 0)
2315     {
2316     pcre_study_data *psd;
2317    
2318     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2319     extra->flags = PCRE_EXTRA_STUDY_DATA;
2320    
2321     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2322     extra->study_data = psd;
2323    
2324     if (fread(psd, 1, true_study_size, f) != true_study_size)
2325     {
2326     FAIL_READ:
2327     fprintf(outfile, "Failed to read data from %s\n", p);
2328 zherczeg 809 if (extra != NULL)
2329     {
2330     PCRE_FREE_STUDY(extra);
2331     }
2332 nigel 75 if (re != NULL) new_free(re);
2333     fclose(f);
2334     continue;
2335     }
2336     fprintf(outfile, "Study data loaded from %s\n", p);
2337     do_study = 1; /* To get the data output if requested */
2338     }
2339     else fprintf(outfile, "No study data\n");
2340    
2341 zherczeg 811 /* Flip the necessary bytes. */
2342 ph10 813 if (do_flip)
2343 zherczeg 811 {
2344     PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2345     }
2346    
2347     /* Need to know if UTF-8 for printing data strings */
2348    
2349     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2350     use_utf = (get_options & PCRE_UTF8) != 0;
2351    
2352 nigel 75 fclose(f);
2353     goto SHOW_INFO;
2354     }
2355    
2356     /* In-line pattern (the usual case). Get the delimiter and seek the end of
2357 zherczeg 811 the pattern; if it isn't complete, read more. */
2358 nigel 75
2359 nigel 3 delimiter = *p++;
2360    
2361 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
2362 nigel 3 {
2363 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2364 nigel 3 goto SKIP_DATA;
2365     }
2366    
2367     pp = p;
2368 ph10 530 poffset = (int)(p - buffer);
2369 nigel 3
2370     for(;;)
2371     {
2372 nigel 29 while (*pp != 0)
2373     {
2374     if (*pp == '\\' && pp[1] != 0) pp++;
2375     else if (*pp == delimiter) break;
2376     pp++;
2377     }
2378 nigel 3 if (*pp != 0) break;
2379 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2380 nigel 3 {
2381     fprintf(outfile, "** Unexpected EOF\n");
2382 nigel 11 done = 1;
2383     goto CONTINUE;
2384 nigel 3 }
2385 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2386 nigel 3 }
2387    
2388 nigel 93 /* The buffer may have moved while being extended; reset the start of data
2389     pointer to the correct relative point in the buffer. */
2390    
2391     p = buffer + poffset;
2392    
2393 nigel 29 /* If the first character after the delimiter is backslash, make
2394     the pattern end with backslash. This is purely to provide a way
2395     of testing for the error message when a pattern ends with backslash. */
2396    
2397     if (pp[1] == '\\') *pp++ = '\\';
2398    
2399 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2400     for callouts. */
2401 nigel 3
2402     *pp++ = 0;
2403 nigel 75 strcpy((char *)pbuffer, (char *)p);
2404 nigel 3
2405     /* Look for options after final delimiter */
2406    
2407     options = 0;
2408 ph10 801 study_options = 0;
2409 nigel 31 log_store = showstore; /* default from command line */
2410    
2411 nigel 3 while (*pp != 0)
2412     {
2413     switch (*pp++)
2414     {
2415 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
2416 nigel 35 case 'g': do_g = 1; break;
2417 nigel 3 case 'i': options |= PCRE_CASELESS; break;
2418     case 'm': options |= PCRE_MULTILINE; break;
2419     case 's': options |= PCRE_DOTALL; break;
2420     case 'x': options |= PCRE_EXTENDED; break;
2421 nigel 25
2422 ph10 616 case '+':
2423 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2424 ph10 616 break;
2425 ph10 654
2426     case '=': do_allcaps = 1; break;
2427 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
2428 nigel 93 case 'B': do_debug = 1; break;
2429 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2430 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
2431 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2432 nigel 75 case 'F': do_flip = 1; break;
2433 nigel 35 case 'G': do_G = 1; break;
2434 nigel 25 case 'I': do_showinfo = 1; break;
2435 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
2436 ph10 512 case 'K': do_mark = 1; break;
2437 nigel 31 case 'M': log_store = 1; break;
2438 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2439 nigel 37
2440     #if !defined NOPOSIX
2441 nigel 3 case 'P': do_posix = 1; break;
2442 nigel 37 #endif
2443    
2444 ph10 654 case 'S':
2445 ph10 691 if (do_study == 0)
2446 ph10 612 {
2447 ph10 691 do_study = 1;
2448 ph10 667 if (*pp == '+')
2449     {
2450     study_options |= PCRE_STUDY_JIT_COMPILE;
2451 ph10 691 pp++;
2452     }
2453     }
2454 ph10 667 else
2455     {
2456 ph10 612 do_study = 0;
2457     no_force_study = 1;
2458 ph10 654 }
2459 ph10 612 break;
2460    
2461 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
2462 ph10 535 case 'W': options |= PCRE_UCP; break;
2463 nigel 3 case 'X': options |= PCRE_EXTRA; break;
2464 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2465 ph10 126 case 'Z': debug_lengths = 0; break;
2466 ph10 810 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2467 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2468 ph10 545
2469 ph10 541 case 'T':
2470     switch (*pp++)
2471     {
2472     case '0': tables = tables0; break;
2473     case '1': tables = tables1; break;
2474 ph10 545
2475 ph10 541 case '\r':
2476     case '\n':
2477 ph10 545 case ' ':
2478     case 0:
2479 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
2480 ph10 545 goto SKIP_DATA;
2481    
2482     default:
2483 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2484 ph10 545 goto SKIP_DATA;
2485 ph10 541 }
2486 ph10 545 break;
2487 nigel 25
2488     case 'L':
2489     ppp = pp;
2490 nigel 93 /* The '\r' test here is so that it works on Windows. */
2491     /* The '0' test is just in case this is an unterminated line. */
2492     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2493 nigel 25 *ppp = 0;
2494     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2495     {
2496     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2497     goto SKIP_DATA;
2498     }
2499 nigel 93 locale_set = 1;
2500 nigel 25 tables = pcre_maketables();
2501     pp = ppp;
2502     break;
2503    
2504 nigel 75 case '>':
2505     to_file = pp;
2506     while (*pp != 0) pp++;
2507     while (isspace(pp[-1])) pp--;
2508     *pp = 0;
2509     break;
2510    
2511 nigel 91 case '<':
2512     {
2513 ph10 756 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2514 ph10 336 {
2515     options |= PCRE_JAVASCRIPT_COMPAT;
2516 ph10 345 pp += 3;
2517 ph10 336 }
2518     else
2519 ph10 345 {
2520 ph10 336 int x = check_newline(pp, outfile);
2521     if (x == 0) goto SKIP_DATA;
2522     options |= x;
2523     while (*pp++ != '>');
2524 ph10 345 }
2525 nigel 91 }
2526     break;
2527    
2528 nigel 77 case '\r': /* So that it works in Windows */
2529     case '\n':
2530     case ' ':
2531     break;
2532 nigel 75
2533 nigel 3 default:
2534     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2535     goto SKIP_DATA;
2536     }
2537     }
2538    
2539 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
2540 nigel 25 timing, showing, or debugging options, nor the ability to pass over
2541 ph10 805 local character tables. Neither does it have 16-bit support. */
2542 nigel 3
2543 nigel 37 #if !defined NOPOSIX
2544 nigel 3 if (posix || do_posix)
2545     {
2546     int rc;
2547     int cflags = 0;
2548 nigel 75
2549 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2550     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2551 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2552 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2553     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2554 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2555 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2556 nigel 87
2557 ph10 801 first_gotten_store = 0;
2558 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
2559    
2560     /* Compilation failed; go back for another re, skipping to blank line
2561     if non-interactive. */
2562    
2563     if (rc != 0)
2564     {
2565 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2566 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2567     goto SKIP_DATA;
2568     }
2569     }
2570    
2571     /* Handle compiling via the native interface */
2572    
2573     else
2574 nigel 37 #endif /* !defined NOPOSIX */
2575    
2576 nigel 3 {
2577 ph10 412 unsigned long int get_options;
2578 ph10 808
2579     /* In 16-bit mode, convert the input. */
2580    
2581 ph10 805 #ifdef SUPPORT_PCRE16
2582 ph10 813 if (use_pcre16)
2583 ph10 808 {
2584 ph10 810 if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2585     {
2586     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2587 ph10 813 "converted to UTF-16\n");
2588     goto SKIP_DATA;
2589     }
2590     p = (pcre_uint8 *)buffer16;
2591     }
2592 ph10 805 #endif
2593 ph10 416
2594 ph10 805 /* Compile many times when timing */
2595    
2596 nigel 93 if (timeit > 0)
2597 nigel 3 {
2598     register int i;
2599     clock_t time_taken;
2600     clock_t start_time = clock();
2601 nigel 93 for (i = 0; i < timeit; i++)
2602 nigel 3 {
2603 ph10 808 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2604 nigel 3 if (re != NULL) free(re);
2605     }
2606     time_taken = clock() - start_time;
2607 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
2608     (((double)time_taken * 1000.0) / (double)timeit) /
2609 nigel 63 (double)CLOCKS_PER_SEC);
2610 nigel 3 }
2611    
2612 ph10 801 first_gotten_store = 0;
2613 ph10 808 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2614 nigel 3
2615     /* Compilation failed; go back for another re, skipping to blank line
2616     if non-interactive. */
2617    
2618     if (re == NULL)
2619     {
2620     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2621     SKIP_DATA:
2622     if (infile != stdin)
2623     {
2624     for (;;)
2625     {
2626 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
2627 nigel 11 {
2628     done = 1;
2629     goto CONTINUE;
2630     }
2631 nigel 3 len = (int)strlen((char *)buffer);
2632     while (len > 0 && isspace(buffer[len-1])) len--;
2633     if (len == 0) break;
2634     }
2635     fprintf(outfile, "\n");
2636     }
2637 nigel 25 goto CONTINUE;
2638 nigel 3 }
2639 ph10 416
2640     /* Compilation succeeded. It is now possible to set the UTF-8 option from
2641     within the regex; check for this so that we know how to process the data
2642 ph10 412 lines. */
2643 ph10 416
2644 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2645 ph10 810 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2646 nigel 3
2647 ph10 801 /* Extract the size for possible writing before possibly flipping it,
2648     and remember the store that was got. */
2649 nigel 3
2650 ph10 801 true_size = ((real_pcre *)re)->size;
2651     regex_gotten_store = first_gotten_store;
2652    
2653     /* Output code size information if requested */
2654    
2655 nigel 63 if (log_store)
2656     fprintf(outfile, "Memory allocation (code space): %d\n",
2657 ph10 801 (int)(first_gotten_store -
2658 nigel 63 sizeof(real_pcre) -
2659     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2660    
2661 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
2662 ph10 654 help with the matching, unless the pattern has the SS option, which
2663 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
2664     never sensible). */
2665 nigel 75
2666 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
2667 nigel 75 {
2668 nigel 93 if (timeit > 0)
2669 nigel 75 {
2670     register int i;
2671     clock_t time_taken;
2672     clock_t start_time = clock();
2673 nigel 93 for (i = 0; i < timeit; i++)
2674 ph10 805 {
2675 ph10 808 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2676     }
2677 nigel 75 time_taken = clock() - start_time;
2678 zherczeg 809 if (extra != NULL)
2679     {
2680     PCRE_FREE_STUDY(extra);
2681     }
2682 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
2683     (((double)time_taken * 1000.0) / (double)timeit) /
2684 nigel 75 (double)CLOCKS_PER_SEC);
2685     }
2686 ph10 808 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2687 nigel 75 if (error != NULL)
2688     fprintf(outfile, "Failed to study: %s\n", error);
2689     else if (extra != NULL)
2690 ph10 801 {
2691 nigel 75 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2692 ph10 801 if (log_store)
2693     {
2694     size_t jitsize;
2695     new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2696     if (jitsize != 0)
2697 zherczeg 816 fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2698 ph10 801 }
2699     }
2700 nigel 75 }
2701 ph10 512
2702 ph10 510 /* If /K was present, we set up for handling MARK data. */
2703 ph10 512
2704 ph10 510 if (do_mark)
2705     {
2706     if (extra == NULL)
2707     {
2708     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2709     extra->flags = 0;
2710     }
2711 ph10 512 extra->mark = &markptr;
2712 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
2713 ph10 512 }
2714 nigel 75
2715 ph10 805 /* Extract and display information from the compiled data if required. */
2716 nigel 75
2717     SHOW_INFO:
2718    
2719 nigel 93 if (do_debug)
2720     {
2721     fprintf(outfile, "------------------------------------------------------------------\n");
2722 zherczeg 809 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2723 ph10 805 if (use_pcre16)
2724     pcre16_printint(re, outfile, debug_lengths);
2725 ph10 808 else
2726 ph10 805 pcre_printint(re, outfile, debug_lengths);
2727 zherczeg 809 #elif defined SUPPORT_PCRE8
2728     pcre_printint(re, outfile, debug_lengths);
2729     #else
2730     pcre16_printint(re, outfile, debug_lengths);
2731     #endif
2732 nigel 93 }
2733 ph10 416
2734 ph10 412 /* We already have the options in get_options (see above) */
2735 nigel 93
2736 nigel 25 if (do_showinfo)
2737 nigel 3 {
2738 ph10 412 unsigned long int all_options;
2739 nigel 79 #if !defined NOINFOCHECK
2740 nigel 43 int old_first_char, old_options, old_count;
2741 nigel 79 #endif
2742 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2743 ph10 227 hascrorlf;
2744 nigel 63 int nameentrysize, namecount;
2745 ph10 756 const pcre_uchar *nametable;
2746 nigel 3
2747 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2748     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2749     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2750 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2751 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2752 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2753     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2754 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2755 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2756     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2757 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2758 nigel 43
2759 ph10 805 /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2760     that it gives the same results as the new function. */
2761 ph10 808
2762 nigel 79 #if !defined NOINFOCHECK
2763 ph10 805 if (!use_pcre16)
2764 ph10 808 {
2765 ph10 805 old_count = pcre_info(re, &old_options, &old_first_char);
2766     if (count < 0) fprintf(outfile,
2767     "Error %d from pcre_info()\n", count);
2768     else
2769     {
2770     if (old_count != count) fprintf(outfile,
2771     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2772     old_count);
2773 ph10 808
2774 ph10 805 if (old_first_char != first_char) fprintf(outfile,
2775     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2776     first_char, old_first_char);
2777 ph10 808
2778 ph10 805 if (old_options != (int)get_options) fprintf(outfile,
2779     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2780     get_options, old_options);
2781     }
2782 ph10 808 }
2783 nigel 79 #endif
2784 nigel 43
2785 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
2786 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2787 nigel 77 (int)size, (int)regex_gotten_store);
2788 nigel 43
2789     fprintf(outfile, "Capturing subpattern count = %d\n", count);
2790     if (backrefmax > 0)
2791     fprintf(outfile, "Max back reference = %d\n", backrefmax);
2792 nigel 63
2793     if (namecount > 0)
2794     {
2795     fprintf(outfile, "Named capturing subpatterns:\n");
2796     while (namecount-- > 0)
2797     {
2798 zherczeg 816 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2799     int imm2_size = use_pcre16 ? 1 : 2;
2800     #else
2801     int imm2_size = IMM2_SIZE;
2802     #endif
2803     int length = (int)STRLEN(nametable + imm2_size);
2804     fprintf(outfile, " ");
2805     PCHARSV(nametable + imm2_size, length, outfile);
2806     while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
2807     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2808     fprintf(outfile, "%3d\n", use_pcre16?
2809     (int)nametable[0] : ((int)nametable[0] << 8) | (int)nametable[1]);
2810     nametable += nameentrysize * (use_pcre16 ? 2 : 1);
2811     #else
2812     fprintf(outfile, "%3d\n", GET2(nametable, 0));
2813 nigel 63 nametable += nameentrysize;
2814 zherczeg 816 #endif
2815 nigel 63 }
2816     }
2817 ph10 172
2818 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2819 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2820 nigel 63
2821 nigel 75 all_options = ((real_pcre *)re)->options;
2822 ph10 813 if (do_flip) all_options = swap_uint32(all_options);
2823 nigel 75
2824 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
2825 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2826 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2827     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2828     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2829     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2830 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2831 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2832 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2833     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2834 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2835     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2836     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2837 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2838 ph10 810 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2839 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2840 ph10 810 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2841 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2842 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2843 ph10 172
2844 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2845 nigel 43
2846 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2847 nigel 91 {
2848     case PCRE_NEWLINE_CR:
2849     fprintf(outfile, "Forced newline sequence: CR\n");
2850     break;
2851 nigel 43
2852 nigel 91 case PCRE_NEWLINE_LF:
2853     fprintf(outfile, "Forced newline sequence: LF\n");
2854     break;
2855    
2856     case PCRE_NEWLINE_CRLF:
2857     fprintf(outfile, "Forced newline sequence: CRLF\n");
2858     break;
2859    
2860 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2861     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2862     break;
2863    
2864 nigel 93 case PCRE_NEWLINE_ANY:
2865     fprintf(outfile, "Forced newline sequence: ANY\n");
2866     break;
2867    
2868 nigel 91 default:
2869     break;
2870     }
2871    
2872 nigel 43 if (first_char == -1)
2873     {
2874 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2875 nigel 43 }
2876     else if (first_char < 0)
2877     {
2878     fprintf(outfile, "No first char\n");
2879     }
2880     else
2881     {
2882 zherczeg 774 const char *caseless =
2883     ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2884 nigel 63 "" : " (caseless)";
2885 ph10 813
2886 ph10 808 if (PRINTOK(first_char))
2887 zherczeg 774 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2888 nigel 3 else
2889 ph10 813 {
2890 ph10 810 fprintf(outfile, "First char = ");
2891 ph10 813 pchar(first_char, outfile);
2892 ph10 810 fprintf(outfile, "%s\n", caseless);
2893 ph10 813 }
2894 nigel 43 }
2895 nigel 37
2896 nigel 43 if (need_char < 0)
2897     {
2898     fprintf(outfile, "No need char\n");
2899 nigel 3 }
2900 nigel 43 else
2901     {
2902 zherczeg 774 const char *caseless =
2903     ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2904 nigel 63 "" : " (caseless)";
2905 zherczeg 774
2906 ph10 808 if (PRINTOK(need_char))
2907 zherczeg 774 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2908 nigel 43 else
2909 ph10 814 {
2910     fprintf(outfile, "Need char = ");
2911     pchar(need_char, outfile);
2912     fprintf(outfile, "%s\n", caseless);
2913     }
2914 nigel 43 }
2915 nigel 75
2916     /* Don't output study size; at present it is in any case a fixed
2917     value, but it varies, depending on the computer architecture, and
2918     so messes up the test suite. (And with the /F option, it might be
2919 ph10 654 flipped.) If study was forced by an external -s, don't show this
2920 ph10 612 information unless -i or -d was also present. This means that, except
2921     when auto-callouts are involved, the output from runs with and without
2922     -s should be identical. */
2923 nigel 75
2924 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2925 nigel 75 {
2926     if (extra == NULL)
2927     fprintf(outfile, "Study returned NULL\n");
2928     else
2929     {
2930 ph10 756 pcre_uint8 *start_bits = NULL;
2931 ph10 455 int minlength;
2932 ph10 461
2933 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2934 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2935    
2936 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2937     if (start_bits == NULL)
2938 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2939 nigel 75 else
2940     {
2941     int i;
2942     int c = 24;
2943     fprintf(outfile, "Starting byte set: ");
2944     for (i = 0; i < 256; i++)
2945     {
2946     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2947     {
2948     if (c > 75)
2949     {
2950     fprintf(outfile, "\n ");
2951     c = 2;
2952     }
2953 ph10 808 if (PRINTOK(i) && i != ' ')
2954 nigel 75 {
2955     fprintf(outfile, "%c ", i);
2956     c += 2;
2957     }
2958     else
2959     {
2960     fprintf(outfile, "\\x%02x ", i);
2961     c += 5;
2962     }
2963     }
2964     }
2965     fprintf(outfile, "\n");
2966     }
2967     }
2968 ph10 691
2969 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
2970 ph10 691
2971 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2972     {
2973 ph10 691 int jit;
2974 ph10 667 new_info(re, extra, PCRE_INFO_JIT, &jit);
2975 ph10 691 if (jit)
2976     fprintf(outfile, "JIT study was successful\n");
2977     else
2978     #ifdef SUPPORT_JIT
2979     fprintf(outfile, "JIT study was not successful\n");
2980 ph10 667 #else
2981 ph10 691 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2982 ph10 667 #endif
2983 ph10 691 }
2984 nigel 75 }
2985 nigel 3 }
2986    
2987 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2988     that is all. The first 8 bytes of the file are the regex length and then
2989     the study length, in big-endian order. */
2990 nigel 3
2991 nigel 75 if (to_file != NULL)
2992 nigel 3 {
2993 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2994     if (f == NULL)
2995 nigel 3 {
2996 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2997 nigel 3 }
2998 nigel 75 else
2999     {
3000 ph10 756 pcre_uint8 sbuf[8];
3001 ph10 814
3002 ph10 813 if (do_flip) regexflip(re, extra);
3003 ph10 756 sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3004     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3005     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3006     sbuf[3] = (pcre_uint8)((true_size) & 255);
3007     sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3008     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3009     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3010     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3011 nigel 3
3012 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
3013     fwrite(re, 1, true_size, f) < true_size)
3014     {
3015     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3016     }
3017 nigel 3 else
3018     {
3019 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3020 ph10 654
3021 ph10 658 /* If there is study data, write it. */
3022 ph10 654
3023 nigel 75 if (extra != NULL)
3024 nigel 3 {
3025 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
3026     true_study_size)
3027 nigel 3 {
3028 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
3029     strerror(errno));
3030 nigel 3 }
3031 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
3032 nigel 3 }
3033     }
3034 nigel 75 fclose(f);
3035 nigel 3 }
3036 nigel 77
3037     new_free(re);
3038 ph10 813 if (extra != NULL)
3039 zherczeg 809 {
3040     PCRE_FREE_STUDY(extra);
3041     }
3042 ph10 545 if (locale_set)
3043 ph10 541 {
3044     new_free((void *)tables);
3045     setlocale(LC_CTYPE, "C");
3046 ph10 545 locale_set = 0;
3047     }
3048 nigel 75 continue; /* With next regex */
3049 nigel 3 }
3050 nigel 75 } /* End of non-POSIX compile */
3051 nigel 3
3052     /* Read data lines and test them */
3053    
3054     for (;;)
3055     {
3056 ph10 756 pcre_uint8 *q;
3057     pcre_uint8 *bptr;
3058 nigel 57 int *use_offsets = offsets;
3059 nigel 53 int use_size_offsets = size_offsets;
3060 nigel 63 int callout_data = 0;
3061     int callout_data_set = 0;
3062 nigel 3 int count, c;
3063 nigel 29 int copystrings = 0;
3064 ph10 386 int find_match_limit = default_find_match_limit;
3065 nigel 29 int getstrings = 0;
3066     int getlist = 0;
3067 nigel 39 int gmatched = 0;
3068 nigel 35 int start_offset = 0;
3069 ph10 579 int start_offset_sign = 1;
3070 nigel 41 int g_notempty = 0;
3071 nigel 77 int use_dfa = 0;
3072 nigel 3
3073     options = 0;
3074    
3075 nigel 91 *copynames = 0;
3076     *getnames = 0;
3077    
3078     copynamesptr = copynames;
3079     getnamesptr = getnames;
3080    
3081 zherczeg 816 SET_PCRE_CALLOUT(callout);
3082 nigel 63 first_callout = 1;
3083 ph10 654 last_callout_mark = NULL;
3084 nigel 63 callout_extra = 0;
3085     callout_count = 0;
3086     callout_fail_count = 999999;
3087     callout_fail_id = -1;
3088 nigel 73 show_malloc = 0;
3089 nigel 63
3090 nigel 91 if (extra != NULL) extra->flags &=
3091     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3092    
3093     len = 0;
3094     for (;;)
3095 nigel 11 {
3096 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3097 nigel 91 {
3098 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
3099     {
3100 ph10 545 fprintf(outfile, "\n");
3101 ph10 537 break;
3102 ph10 545 }
3103 nigel 91 done = 1;
3104     goto CONTINUE;
3105     }
3106     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3107     len = (int)strlen((char *)buffer);
3108     if (buffer[len-1] == '\n') break;
3109 nigel 11 }
3110 nigel 3
3111     while (len > 0 && isspace(buffer[len-1])) len--;
3112     buffer[len] = 0;
3113     if (len == 0) break;
3114    
3115     p = buffer;
3116     while (isspace(*p)) p++;
3117    
3118 ph10 147 bptr = q = dbuffer;
3119 nigel 3 while ((c = *p++) != 0)
3120     {
3121     int i = 0;
3122     int n = 0;
3123 nigel 63
3124 nigel 3 if (c == '\\') switch ((c = *p++))
3125     {
3126     case 'a': c = 7; break;
3127     case 'b': c = '\b'; break;
3128     case 'e': c = 27; break;
3129     case 'f': c = '\f'; break;
3130     case 'n': c = '\n'; break;
3131     case 'r': c = '\r'; break;
3132     case 't': c = '\t'; break;
3133     case 'v': c = '\v'; break;
3134    
3135     case '0': case '1': case '2': case '3':
3136     case '4': case '5': case '6': case '7':
3137     c -= '0';
3138     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3139     c = c * 8 + *p++ - '0';
3140 nigel 91
3141     #if !defined NOUTF8
3142 ph10 810 if (use_utf && c > 255)
3143 nigel 91 {
3144 ph10 808 pcre_uint8 buff8[8];
3145 nigel 91 int ii, utn;
3146     utn = ord2utf8(c, buff8);
3147     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
3148     c = buff8[ii]; /* Last byte */
3149     }
3150     #endif
3151 nigel 3 break;
3152    
3153     case 'x':
3154 nigel 49
3155     /* Handle \x{..} specially - new Perl thing for utf8 */
3156    
3157 nigel 79 #if !defined NOUTF8
3158 nigel 49 if (*p == '{')
3159     {
3160 ph10 808 pcre_uint8 *pt = p;
3161 nigel 49 c = 0;
3162 ph10 738
3163 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3164     when isxdigit() is a macro that refers to its argument more than
3165     once. This is banned by the C Standard, but apparently happens in at
3166     least one MacOS environment. */
3167 ph10 738
3168 ph10 735 for (pt++; isxdigit(*pt); pt++)
3169 ph10 734 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3170 nigel 49 if (*pt == '}')
3171     {
3172 ph10 808 pcre_uint8 buff8[8];
3173 nigel 49 int ii, utn;
3174 ph10 810 if (use_utf)
3175 ph10 358 {
3176 ph10 355 utn = ord2utf8(c, buff8);
3177     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
3178     c = buff8[ii]; /* Last byte */
3179     }
3180     else
3181     {
3182 ph10 358 if (c > 255)
3183 ph10 814 {
3184     if (use_pcre16)
3185     fprintf(outfile, "** Character \\x{%x} is greater than 255.\n"
3186     "** Because its input is first processed as 8-bit, pcretest "
3187     "does not\n** support such characters in 16-bit mode when "
3188     "UTF-16 is not set.\n", c);
3189     else
3190     fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3191     "and UTF-8 mode is not enabled.\n", c);
3192    
3193     fprintf(outfile, "** Truncation will probably give the wrong "
3194     "result.\n");
3195     }
3196 ph10 358 }
3197 nigel 49 p = pt + 1;
3198     break;
3199     }
3200     /* Not correct form; fall through */
3201     }
3202 nigel 79 #endif
3203 nigel 49
3204     /* Ordinary \x */
3205    
3206 nigel 3 c = 0;
3207     while (i++ < 2 && isxdigit(*p))
3208     {
3209 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3210 nigel 3 p++;
3211     }
3212     break;
3213    
3214 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
3215 nigel 3 p--;
3216     continue;
3217    
3218 nigel 75 case '>':
3219 ph10 579 if (*p == '-')
3220 ph10 567 {
3221     start_offset_sign = -1;
3222     p++;
3223 ph10 579 }
3224 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3225 ph10 579 start_offset *= start_offset_sign;
3226 nigel 75 continue;
3227    
3228 nigel 3 case 'A': /* Option setting */
3229     options |= PCRE_ANCHORED;
3230     continue;
3231    
3232     case 'B':
3233     options |= PCRE_NOTBOL;
3234     continue;
3235    
3236 nigel 29 case 'C':
3237 nigel 63 if (isdigit(*p)) /* Set copy string */
3238     {
3239     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3240     copystrings |= 1 << n;
3241     }
3242     else if (isalnum(*p))
3243     {
3244 zherczeg 816 pcre_uchar *namestart = copynamesptr;
3245     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3246     if (use_pcre16)
3247     {
3248     PCRE_SCHAR16 *npp = (PCRE_SCHAR16 *)copynamesptr;
3249     while (isalnum(*p)) *npp++ = *p++;
3250     *npp++ = 0;
3251     *npp = 0;
3252     PCRE_GET_STRINGNUMBER(n, re, copynamesptr);
3253     copynamesptr = (pcre_uchar *)npp;
3254     }
3255     else
3256     {
3257     #endif
3258     pcre_uchar *npp = copynamesptr;
3259     while (isalnum(*p)) *npp++ = *p++;
3260     *npp++ = 0;
3261     *npp = 0;
3262     PCRE_GET_STRINGNUMBER(n, re, copynamesptr);
3263     copynamesptr = npp;
3264     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3265     }
3266     #endif
3267 nigel 63 if (n < 0)
3268 zherczeg 816 {
3269     fprintf(outfile, "no parentheses with name \"");
3270 zherczeg 817 PCHARSV(namestart, -1, outfile);
3271 zherczeg 816 fprintf(outfile, "\"\n");
3272     }
3273 nigel 63 }
3274     else if (*p == '+')
3275     {
3276     callout_extra = 1;
3277     p++;
3278     }
3279     else if (*p == '-')
3280     {
3281 zherczeg 816 SET_PCRE_CALLOUT(NULL);
3282 nigel 63 p++;
3283     }
3284     else if (*p == '!')
3285     {
3286     callout_fail_id = 0;
3287     p++;
3288     while(isdigit(*p))
3289     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3290     callout_fail_count = 0;
3291     if (*p == '!')
3292     {
3293     p++;
3294     while(isdigit(*p))
3295     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3296     }
3297     }
3298     else if (*p == '*')
3299     {
3300     int sign = 1;
3301     callout_data = 0;
3302     if (*(++p) == '-') { sign = -1; p++; }
3303     while(isdigit(*p))
3304     callout_data = callout_data * 10 + *p++ - '0';
3305     callout_data *= sign;
3306     callout_data_set = 1;
3307     }
3308 nigel 29 continue;
3309    
3310 nigel 79 #if !defined NODFA
3311 nigel 77 case 'D':
3312 nigel 79 #if !defined NOPOSIX
3313 nigel 77 if (posix || do_posix)
3314     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3315     else
3316 nigel 79 #endif
3317 nigel 77 use_dfa = 1;
3318     continue;
3319 ph10 553 #endif
3320 nigel 77
3321 ph10 553 #if !defined NODFA
3322 nigel 77 case 'F':
3323     options |= PCRE_DFA_SHORTEST;
3324     continue;
3325 nigel 79 #endif
3326 nigel 77
3327 nigel 29 case 'G':
3328 nigel 63 if (isdigit(*p))
3329     {
3330     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3331     getstrings |= 1 << n;
3332     }
3333     else if (isalnum(*p))
3334     {
3335 zherczeg 817 pcre_uchar *namestart = getnamesptr;
3336     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3337     if (use_pcre16)
3338     {
3339     PCRE_SCHAR16 *npp = (PCRE_SCHAR16 *)getnamesptr;
3340     while (isalnum(*p)) *npp++ = *p++;
3341     *npp++ = 0;
3342     *npp = 0;
3343     PCRE_GET_STRINGNUMBER(n, re, getnamesptr);
3344     getnamesptr = (pcre_uchar *)npp;
3345     }
3346     else
3347     {
3348     #endif
3349     pcre_uchar *npp = getnamesptr;
3350     while (isalnum(*p)) *npp++ = *p++;
3351     *npp++ = 0;
3352     *npp = 0;
3353     PCRE_GET_STRINGNUMBER(n, re, getnamesptr);
3354     getnamesptr = npp;
3355     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3356     }
3357     #endif
3358 nigel 63 if (n < 0)
3359 zherczeg 817 {
3360     fprintf(outfile, "no parentheses with name \"");
3361     PCHARSV(namestart, -1, outfile);
3362     fprintf(outfile, "\"\n");
3363     }
3364 nigel 63 }
3365 nigel 29 continue;
3366 ph10 691
3367 ph10 667 case 'J':
3368     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3369 ph10 691 if (extra != NULL
3370     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3371 ph10 667 && extra->executable_jit != NULL)
3372 ph10 691 {
3373 zherczeg 817 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3374     jit_stack = pcre_jit_stack_alloc(1, n * 1024);
3375     pcre_assign_jit_stack(extra, jit_callback, jit_stack);
3376 ph10 691 }
3377 ph10 667 continue;
3378 nigel 29
3379     case 'L':
3380     getlist = 1;
3381     continue;
3382    
3383 nigel 63 case 'M':
3384     find_match_limit = 1;
3385     continue;
3386    
3387 nigel 37 case 'N':
3388 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
3389     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3390 ph10 461 else
3391 ph10 442 options |= PCRE_NOTEMPTY;
3392 nigel 37 continue;
3393    
3394 nigel 3 case 'O':
3395     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3396 nigel 53 if (n > size_offsets_max)
3397     {
3398     size_offsets_max = n;
3399 nigel 57 free(offsets);
3400 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3401 nigel 53 if (offsets == NULL)
3402     {
3403     printf("** Failed to get %d bytes of memory for offsets vector\n",
3404 ph10 151 (int)(size_offsets_max * sizeof(int)));
3405 nigel 77 yield = 1;
3406     goto EXIT;
3407 nigel 53 }
3408     }
3409     use_size_offsets = n;
3410 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3411 nigel 3 continue;
3412    
3413 nigel 75 case 'P':
3414 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3415 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3416 nigel 75 continue;
3417    
3418 nigel 91 case 'Q':
3419     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3420     if (extra == NULL)
3421     {
3422     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3423     extra->flags = 0;
3424     }
3425     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3426     extra->match_limit_recursion = n;
3427     continue;
3428    
3429     case 'q':
3430     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3431     if (extra == NULL)
3432     {
3433     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3434     extra->flags = 0;
3435     }
3436     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3437     extra->match_limit = n;
3438     continue;
3439    
3440 nigel 79 #if !defined NODFA
3441 nigel 77 case 'R':
3442     options |= PCRE_DFA_RESTART;
3443     continue;
3444 nigel 79 #endif
3445 nigel 77
3446 nigel 73 case 'S':
3447     show_malloc = 1;
3448     continue;
3449 ph10 392
3450 ph10 389 case 'Y':
3451     options |= PCRE_NO_START_OPTIMIZE;
3452 ph10 392 continue;
3453 nigel 73
3454 nigel 3 case 'Z':
3455     options |= PCRE_NOTEOL;
3456     continue;
3457 nigel 71
3458     case '?':
3459     options |= PCRE_NO_UTF8_CHECK;
3460     continue;
3461 nigel 91
3462     case '<':
3463     {
3464     int x = check_newline(p, outfile);
3465     if (x == 0) goto NEXT_DATA;
3466     options |= x;
3467     while (*p++ != '>');
3468     }
3469     continue;
3470 nigel 3 }
3471 nigel 9 *q++ = c;
3472 nigel 3 }
3473 nigel 9 *q = 0;
3474 ph10 530 len = (int)(q - dbuffer);
3475 ph10 545
3476 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
3477 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3478 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
3479 ph10 371
3480 ph10 363 #if !defined NOPOSIX
3481     if (posix || do_posix)
3482     {
3483     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3484 ph10 371 bptr += buffer_size - len - 1;
3485 ph10 363 }
3486 ph10 371 else
3487     #endif
3488 ph10 363 {
3489     memmove(bptr + buffer_size - len, bptr, len);
3490 ph10 371 bptr += buffer_size - len;
3491     }
3492 nigel 3
3493 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
3494     {
3495     printf("**Match limit not relevant for DFA matching: ignored\n");
3496     find_match_limit = 0;
3497     }
3498    
3499 nigel 3 /* Handle matching via the POSIX interface, which does not
3500 nigel 63 support timing or playing with the match limit or callout data. */
3501 nigel 3
3502 nigel 37 #if !defined NOPOSIX
3503 nigel 3 if (posix || do_posix)
3504     {
3505     int rc;
3506     int eflags = 0;
3507 nigel 63 regmatch_t *pmatch = NULL;
3508     if (use_size_offsets > 0)
3509 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3510 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3511     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3512 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3513 nigel 3
3514 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3515 nigel 3
3516     if (rc != 0)
3517     {
3518 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3519 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3520     }
3521 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3522     != 0)
3523     {
3524     fprintf(outfile, "Matched with REG_NOSUB\n");
3525     }
3526 nigel 3 else
3527     {
3528 nigel 7 size_t i;
3529 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
3530 nigel 3 {
3531     if (pmatch[i].rm_so >= 0)
3532     {
3533 nigel 23 fprintf(outfile, "%2d: ", (int)i);
3534 ph10 808 PCHARSV(dbuffer + pmatch[i].rm_so,
3535 nigel 63 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3536 nigel 3 fprintf(outfile, "\n");
3537 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3538 nigel 35 {
3539 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
3540 ph10 808 PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3541 nigel 63 outfile);
3542 nigel 35 fprintf(outfile, "\n");
3543     }
3544 nigel 3 }
3545     }
3546     }
3547 nigel 53 free(pmatch);
3548 ph10 808 goto NEXT_DATA;
3549 nigel 3 }
3550    
3551 ph10 808 #endif /* !defined NOPOSIX */
3552    
3553 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
3554 nigel 3
3555 ph10 808 #ifdef SUPPORT_PCRE16
3556 ph10 813 if (use_pcre16)
3557 ph10 808 {
3558     len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3559 ph10 810 if (len < 0)
3560     {
3561     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3562 ph10 813 "converted to UTF-16\n");
3563     goto NEXT_DATA;
3564     }
3565 ph10 808 bptr = (pcre_uint8 *)buffer16;
3566 ph10 813 }
3567 ph10 808 #endif
3568 nigel 37
3569 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
3570 nigel 3 {
3571 ph10 512 markptr = NULL;
3572    
3573 nigel 93 if (timeitm > 0)
3574 nigel 3 {
3575     register int i;
3576     clock_t time_taken;
3577     clock_t start_time = clock();
3578 nigel 77
3579 nigel 79 #if !defined NODFA
3580 nigel 77 if (all_use_dfa || use_dfa)
3581     {
3582     int workspace[1000];
3583 nigel 93 for (i = 0; i < timeitm; i++)
3584 ph10 815 {
3585 ph10 814 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3586     (options | g_notempty), use_offsets, use_size_offsets, workspace,
3587     (sizeof(workspace)/sizeof(int)));
3588 ph10 815 }
3589 nigel 77 }
3590     else
3591 nigel 79 #endif
3592 nigel 77
3593 nigel 93 for (i = 0; i < timeitm; i++)
3594 ph10 808 {
3595 ph10 815 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3596 ph10 814 (options | g_notempty), use_offsets, use_size_offsets);
3597 ph10 808 }
3598 nigel 3 time_taken = clock() - start_time;
3599 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
3600     (((double)time_taken * 1000.0) / (double)timeitm) /
3601 nigel 63 (double)CLOCKS_PER_SEC);
3602 nigel 3 }
3603    
3604 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
3605 nigel 87 varying limits in order to find the minimum value for the match limit and
3606 ph10 667 for the recursion limit. The match limits are relevant only to the normal
3607     running of pcre_exec(), so disable the JIT optimization. This makes it
3608     possible to run the same set of tests with and without JIT externally
3609     requested. */
3610 nigel 63
3611     if (find_match_limit)
3612     {
3613     if (extra == NULL)
3614     {
3615 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3616 nigel 63 extra->flags = 0;
3617     }
3618 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3619 ph10 691
3620 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
3621 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
3622     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3623     PCRE_ERROR_MATCHLIMIT, "match()");
3624 nigel 63
3625 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
3626     options|g_notempty, use_offsets, use_size_offsets,
3627     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3628     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3629 nigel 63 }
3630    
3631     /* If callout_data is set, use the interface with additional data */
3632    
3633     else if (callout_data_set)
3634     {
3635     if (extra == NULL)
3636     {
3637 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3638 nigel 63 extra->flags = 0;
3639     }
3640     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3641 nigel 71 extra->callout_data = &callout_data;
3642 ph10 808 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3643 nigel 63 options | g_notempty, use_offsets, use_size_offsets);
3644     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3645     }
3646    
3647     /* The normal case is just to do the match once, with the default
3648     value of match_limit. */
3649    
3650 nigel 79 #if !defined NODFA
3651 nigel 77 else if (all_use_dfa || use_dfa)
3652     {
3653     int workspace[1000];
3654 ph10 814 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3655     (options | g_notempty), use_offsets, use_size_offsets, workspace,
3656     (sizeof(workspace)/sizeof(int)));
3657 nigel 77 if (count == 0)
3658     {
3659     fprintf(outfile, "Matched, but too many subsidiary matches\n");
3660     count = use_size_offsets/2;
3661     }
3662     }
3663 nigel 79 #endif
3664 nigel 77
3665 nigel 75 else
3666     {
3667 ph10 808 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3668     options | g_notempty, use_offsets, use_size_offsets);
3669 nigel 77 if (count == 0)
3670     {
3671     fprintf(outfile, "Matched, but too many substrings\n");
3672     count = use_size_offsets/3;
3673     }
3674 nigel 75 }
3675 nigel 3
3676 nigel 39 /* Matched */
3677    
3678 nigel 3 if (count >= 0)
3679     {
3680 nigel 93 int i, maxcount;
3681    
3682     #if !defined NODFA
3683     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3684     #endif
3685     maxcount = use_size_offsets/3;
3686    
3687     /* This is a check against a lunatic return value. */
3688    
3689     if (count > maxcount)
3690     {
3691     fprintf(outfile,
3692     "** PCRE error: returned count %d is too big for offset size %d\n",
3693     count, use_size_offsets);
3694     count = use_size_offsets/3;
3695     if (do_g || do_G)
3696     {
3697     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3698     do_g = do_G = FALSE; /* Break g/G loop */
3699     }
3700     }
3701 ph10 654
3702 ph10 626 /* do_allcaps requests showing of all captures in the pattern, to check
3703     unset ones at the end. */
3704 ph10 654
3705 ph10 626 if (do_allcaps)
3706     {
3707     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3708 ph10 654 count++; /* Allow for full match */
3709     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3710     }
3711 nigel 93
3712 ph10 626 /* Output the captured substrings */
3713 ph10 654
3714 nigel 29 for (i = 0; i < count * 2; i += 2)
3715 nigel 3 {
3716 nigel 57 if (use_offsets[i] < 0)
3717 ph10 654 {
3718 ph10 626 if (use_offsets[i] != -1)
3719     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3720 ph10 654 use_offsets[i], i);
3721 ph10 626 if (use_offsets[i+1] != -1)
3722     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3723 ph10 654 use_offsets[i+1], i+1);
3724 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
3725 ph10 654 }
3726 nigel 3 else
3727     {
3728     fprintf(outfile, "%2d: ", i/2);
3729 ph10 808 PCHARSV(bptr + use_offsets[i],
3730 nigel 63 use_offsets[i+1] - use_offsets[i], outfile);
3731 nigel 3 fprintf(outfile, "\n");
3732 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3733 nigel 35 {
3734 ph10 616 fprintf(outfile, "%2d+ ", i/2);
3735 ph10 808 PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3736 ph10 616 outfile);
3737     fprintf(outfile, "\n");
3738 nigel 35 }
3739 nigel 3 }
3740     }
3741 ph10 512
3742 ph10 814 if (markptr != NULL)
3743     {
3744     fprintf(outfile, "MK: ");
3745 zherczeg 817 PCHARSV(markptr, -1, outfile);
3746 ph10 814 fprintf(outfile, "\n");
3747     }
3748 nigel 29
3749     for (i = 0; i < 32; i++)
3750     {
3751     if ((copystrings & (1 << i)) != 0)
3752     {
3753 ph10 815 int rc;
3754 nigel 91 char copybuffer[256];
3755 ph10 815 PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3756     copybuffer, sizeof(copybuffer));
3757 nigel 29 if (rc < 0)
3758     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3759     else
3760 ph10 815 {
3761     fprintf(outfile, "%2dC ", i);
3762     PCHARSV(copybuffer, rc, outfile);
3763     fprintf(outfile, " (%d)\n", rc);
3764     }
3765 nigel 29 }
3766     }
3767    
3768 nigel 91 for (copynamesptr = copynames;
3769 zherczeg 816 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3770     use_pcre16?
3771     (*(PCRE_SCHAR16*)copynamesptr) != 0 : *copynamesptr != 0;
3772     #else
3773 nigel 91 *copynamesptr != 0;
3774 zherczeg 816 #endif
3775 zherczeg 817 copynamesptr += (int)(STRLEN(copynamesptr) + 1) * CHAR_SIZE)
3776 nigel 91 {
3777 ph10 815 int rc;
3778 nigel 91 char copybuffer[256];
3779 ph10 815 PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
3780     copynamesptr, copybuffer, sizeof(copybuffer));
3781 nigel 91 if (rc < 0)
3782 zherczeg 817 {
3783     fprintf(outfile, "copy substring ");
3784     PCHARSV(copynamesptr, -1, outfile);
3785     fprintf(outfile, " failed %d\n", rc);
3786     }
3787 nigel 91 else
3788 ph10 815 {
3789     fprintf(outfile, " C ");
3790     PCHARSV(copybuffer, rc, outfile);
3791 zherczeg 816 fprintf(outfile, " (%d) ", rc);
3792 zherczeg 817 PCHARSV(copynamesptr, -1, outfile);
3793 zherczeg 816 putc('\n', outfile);
3794 ph10 815 }
3795 nigel 91 }
3796    
3797 nigel 29 for (i = 0; i < 32; i++)
3798     {
3799     if ((getstrings & (1 << i)) != 0)
3800     {
3801 ph10 815 int rc;
3802 nigel 29 const char *substring;
3803 ph10 815 PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
3804 nigel 29 if (rc < 0)
3805     fprintf(outfile, "get substring %d failed %d\n", i, rc);
3806     else
3807