/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 847 - (hide annotations) (download)
Tue Jan 3 17:49:03 2012 UTC (2 years, 8 months ago) by zherczeg
File MIME type: text/plain
File size: 128801 byte(s)
fix signed/unsigned half load mismatches and JIT compiler update
1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 836 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39 ph10 836 /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40     libraries in a single program. This is different from the modules such as
41     pcre_compile.c in the library itself, which are compiled separately for each
42     mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43     (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44     compiled only once. Therefore, it must not make use of any of the macros from
45     pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46     however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47     only supported library functions. */
48 nigel 75
49 ph10 836
50 ph10 200 #ifdef HAVE_CONFIG_H
51 ph10 236 #include "config.h"
52 ph10 200 #endif
53 ph10 199
54 nigel 3 #include <ctype.h>
55     #include <stdio.h>
56     #include <string.h>
57     #include <stdlib.h>
58     #include <time.h>
59 nigel 25 #include <locale.h>
60 nigel 75 #include <errno.h>
61 nigel 3
62 ph10 287 #ifdef SUPPORT_LIBREADLINE
63 ph10 343 #ifdef HAVE_UNISTD_H
64 ph10 287 #include <unistd.h>
65 ph10 343 #endif
66 ph10 287 #include <readline/readline.h>
67     #include <readline/history.h>
68     #endif
69 nigel 93
70 ph10 287
71 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
72     input and output without "b"; then I was told that "b" was needed in some
73     environments, so it was added for release 5.0 to both the input and output. (It
74     makes no difference on Unix-like systems.) Later I was told that it is wrong
75     for the input on Windows. I've now abstracted the modes into two macros that
76     are set here, to make it easier to fiddle with them, and removed "b" from the
77     input mode under Windows. */
78    
79     #if defined(_WIN32) || defined(WIN32)
80     #include <io.h> /* For _setmode() */
81     #include <fcntl.h> /* For _O_BINARY */
82     #define INPUT_MODE "r"
83     #define OUTPUT_MODE "wb"
84    
85 ph10 411 #ifndef isatty
86     #define isatty _isatty /* This is what Windows calls them, I'm told, */
87     #endif /* though in some environments they seem to */
88     /* be already defined, hence the #ifndefs. */
89     #ifndef fileno
90 ph10 343 #define fileno _fileno
91 ph10 411 #endif
92 ph10 343
93 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95     #ifdef __BORLANDC__
96     #define _setmode(handle, mode) setmode(handle, mode)
97     #endif
98    
99     /* Not Windows */
100    
101 nigel 93 #else
102     #include <sys/time.h> /* These two includes are needed */
103     #include <sys/resource.h> /* for setrlimit(). */
104     #define INPUT_MODE "rb"
105     #define OUTPUT_MODE "wb"
106 nigel 91 #endif
107    
108 nigel 93
109 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
110     displaying the results of pcre_study() and we also need to know about the
111     internal macros, structures, and other internal data values; pcretest has
112     "inside information" compared to a program that strictly follows the PCRE API.
113 nigel 37
114 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
115     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
116     appropriately for an application, not for building PCRE. */
117 nigel 77
118 ph10 145 #include "pcre.h"
119 ph10 836
120     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121     /* Configure internal macros to 16 bit mode. */
122     #define COMPILE_PCRE16
123     #endif
124    
125 nigel 77 #include "pcre_internal.h"
126    
127 ph10 836 /* The pcre_printint() function, which prints the internal form of a compiled
128     regex, is held in a separate file so that (a) it can be compiled in either
129     8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130     when that is compiled in debug mode. */
131    
132     #ifdef SUPPORT_PCRE8
133     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134     #endif
135     #ifdef SUPPORT_PCRE16
136     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137     #endif
138    
139 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
140     to keep two copies, we include the source file here, changing the names of the
141     external symbols to prevent clashes. */
142 nigel 77
143 ph10 836 #define PCRE_INCLUDED
144     #undef PRIV
145     #define PRIV(name) name
146 nigel 85
147     #include "pcre_tables.c"
148    
149 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
150 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
151 ph10 836 the same as in the printint.src file. We uses it here in cases when the locale
152     has not been explicitly changed, so as to get consistent output from systems
153     that differ in their output from isprint() even in the "C" locale. */
154 nigel 93
155 ph10 836 #ifdef EBCDIC
156     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157     #else
158     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159     #endif
160 nigel 85
161 ph10 836 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163     /* Posix support is disabled in 16 bit only mode. */
164     #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165     #define NOPOSIX
166     #endif
167    
168 nigel 37 /* It is possible to compile this test program without including support for
169     testing the POSIX interface, though this is not available via the standard
170     Makefile. */
171    
172     #if !defined NOPOSIX
173 nigel 3 #include "pcreposix.h"
174 nigel 37 #endif
175 nigel 3
176 ph10 836 /* It is also possible, originally for the benefit of a version that was
177     imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178     NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179     automatically cut out the UTF support if PCRE is built without it. */
180 nigel 79
181 ph10 836 #ifndef SUPPORT_UTF
182     #ifndef NOUTF
183     #define NOUTF
184 ph10 107 #endif
185     #endif
186 nigel 79
187 ph10 836 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188     for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189     only from one place and is handled differently). I couldn't dream up any way of
190     using a single macro to do this in a generic way, because of the many different
191     argument requirements. We know that at least one of SUPPORT_PCRE8 and
192     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193     use these in the definitions of generic macros.
194 ph10 107
195 ph10 836 **** Special note about the PCHARSxxx macros: the address of the string to be
196     printed is always given as two arguments: a base address followed by an offset.
197     The base address is cast to the correct data size for 8 or 16 bit data; the
198     offset is in units of this size. If the string were given as base+offset in one
199     argument, the casting might be incorrectly applied. */
200    
201     #ifdef SUPPORT_PCRE8
202    
203     #define PCHARS8(lv, p, offset, len, f) \
204     lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206     #define PCHARSV8(p, offset, len, f) \
207     (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209     #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210     p = read_capture_name8(p, cn8, re)
211    
212     #define SET_PCRE_CALLOUT8(callout) \
213     pcre_callout = callout
214    
215     #define STRLEN8(p) ((int)strlen((char *)p))
216    
217    
218     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219     re = pcre_compile((char *)pat, options, error, erroffset, tables)
220    
221     #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222     namesptr, cbuffer, size) \
223     rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224     (char *)namesptr, cbuffer, size)
225    
226     #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227     rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228    
229     #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230     offsets, size_offsets, workspace, size_workspace) \
231     count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232     offsets, size_offsets, workspace, size_workspace)
233    
234     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235     offsets, size_offsets) \
236     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237     offsets, size_offsets)
238    
239     #define PCRE_FREE_STUDY8(extra) \
240     pcre_free_study(extra)
241    
242     #define PCRE_FREE_SUBSTRING8(substring) \
243     pcre_free_substring(substring)
244    
245     #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246     pcre_free_substring_list(listptr)
247    
248     #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249     getnamesptr, subsptr) \
250     rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251     (char *)getnamesptr, subsptr)
252    
253     #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254     n = pcre_get_stringnumber(re, (char *)ptr)
255    
256     #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257     rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258    
259     #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260     rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261    
262 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
263     rc = pcre_pattern_to_host_byte_order(re, extra, tables)
264 ph10 836
265     #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266     pcre_printint(re, outfile, debug_lengths)
267    
268     #define PCRE_STUDY8(extra, re, options, error) \
269     extra = pcre_study(re, options, error)
270    
271     #endif /* SUPPORT_PCRE8 */
272    
273     /* -----------------------------------------------------------*/
274    
275     #ifdef SUPPORT_PCRE16
276    
277     #define PCHARS16(lv, p, offset, len, f) \
278     lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279    
280     #define PCHARSV16(p, offset, len, f) \
281     (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282    
283     #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284     p = read_capture_name16(p, cn16, re)
285    
286     #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287    
288     #define SET_PCRE_CALLOUT16(callout) \
289     pcre16_callout = callout
290    
291    
292     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293     re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294    
295     #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296     namesptr, cbuffer, size) \
297     rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298     (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299    
300     #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301     rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302     (PCRE_SCHAR16 *)cbuffer, size/2)
303    
304     #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305     offsets, size_offsets, workspace, size_workspace) \
306     count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307     options, offsets, size_offsets, workspace, size_workspace)
308    
309     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310     offsets, size_offsets) \
311     count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312     options, offsets, size_offsets)
313    
314     #define PCRE_FREE_STUDY16(extra) \
315     pcre16_free_study(extra)
316    
317     #define PCRE_FREE_SUBSTRING16(substring) \
318     pcre16_free_substring((PCRE_SPTR16)substring)
319    
320     #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321     pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322    
323     #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324     getnamesptr, subsptr) \
325     rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326     (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327    
328     #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329     n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330    
331     #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332     rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333     (PCRE_SPTR16 *)(void*)subsptr)
334    
335     #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336     rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337     (PCRE_SPTR16 **)(void*)listptr)
338    
339 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
340     rc = pcre16_pattern_to_host_byte_order(re, extra, tables)
341 ph10 836
342     #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343     pcre16_printint(re, outfile, debug_lengths)
344    
345     #define PCRE_STUDY16(extra, re, options, error) \
346     extra = pcre16_study(re, options, error)
347    
348     #endif /* SUPPORT_PCRE16 */
349    
350    
351     /* ----- Both modes are supported; a runtime test is needed, except for
352     pcre_config(), and the JIT stack functions, when it doesn't matter which
353     version is called. ----- */
354    
355     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356    
357     #define CHAR_SIZE (use_pcre16? 2:1)
358    
359     #define PCHARS(lv, p, offset, len, f) \
360     if (use_pcre16) \
361     PCHARS16(lv, p, offset, len, f); \
362     else \
363     PCHARS8(lv, p, offset, len, f)
364    
365     #define PCHARSV(p, offset, len, f) \
366     if (use_pcre16) \
367     PCHARSV16(p, offset, len, f); \
368     else \
369     PCHARSV8(p, offset, len, f)
370    
371     #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372     if (use_pcre16) \
373     READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374     else \
375     READ_CAPTURE_NAME8(p, cn8, cn16, re)
376    
377     #define SET_PCRE_CALLOUT(callout) \
378     if (use_pcre16) \
379     SET_PCRE_CALLOUT16(callout); \
380     else \
381     SET_PCRE_CALLOUT8(callout)
382    
383     #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384    
385     #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386    
387     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388     if (use_pcre16) \
389     PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390     else \
391     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392    
393     #define PCRE_CONFIG pcre_config
394    
395     #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396     namesptr, cbuffer, size) \
397     if (use_pcre16) \
398     PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399     namesptr, cbuffer, size); \
400     else \
401     PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402     namesptr, cbuffer, size)
403    
404     #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405     if (use_pcre16) \
406     PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407     else \
408     PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409    
410     #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411     offsets, size_offsets, workspace, size_workspace) \
412     if (use_pcre16) \
413     PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414     offsets, size_offsets, workspace, size_workspace); \
415     else \
416     PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417     offsets, size_offsets, workspace, size_workspace)
418    
419     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420     offsets, size_offsets) \
421     if (use_pcre16) \
422     PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423     offsets, size_offsets); \
424     else \
425     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426     offsets, size_offsets)
427    
428     #define PCRE_FREE_STUDY(extra) \
429     if (use_pcre16) \
430     PCRE_FREE_STUDY16(extra); \
431     else \
432     PCRE_FREE_STUDY8(extra)
433    
434     #define PCRE_FREE_SUBSTRING(substring) \
435     if (use_pcre16) \
436     PCRE_FREE_SUBSTRING16(substring); \
437     else \
438     PCRE_FREE_SUBSTRING8(substring)
439    
440     #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441     if (use_pcre16) \
442     PCRE_FREE_SUBSTRING_LIST16(listptr); \
443     else \
444     PCRE_FREE_SUBSTRING_LIST8(listptr)
445    
446     #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447     getnamesptr, subsptr) \
448     if (use_pcre16) \
449     PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450     getnamesptr, subsptr); \
451     else \
452     PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453     getnamesptr, subsptr)
454    
455     #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456     if (use_pcre16) \
457     PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458     else \
459     PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460    
461     #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462     if (use_pcre16) \
463     PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464     else \
465     PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466    
467     #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468     if (use_pcre16) \
469     PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470     else \
471     PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472    
473     #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474     #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475    
476     #define PCRE_MAKETABLES \
477     (use_pcre16? pcre16_maketables() : pcre_maketables())
478    
479 zherczeg 839 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
480 ph10 836 if (use_pcre16) \
481 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
482 ph10 836 else \
483 zherczeg 839 PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
484 ph10 836
485     #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486     if (use_pcre16) \
487     PCRE_PRINTINT16(re, outfile, debug_lengths); \
488     else \
489     PCRE_PRINTINT8(re, outfile, debug_lengths)
490    
491     #define PCRE_STUDY(extra, re, options, error) \
492     if (use_pcre16) \
493     PCRE_STUDY16(extra, re, options, error); \
494     else \
495     PCRE_STUDY8(extra, re, options, error)
496    
497     /* ----- Only 8-bit mode is supported ----- */
498    
499     #elif defined SUPPORT_PCRE8
500     #define CHAR_SIZE 1
501     #define PCHARS PCHARS8
502     #define PCHARSV PCHARSV8
503     #define READ_CAPTURE_NAME READ_CAPTURE_NAME8
504     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT8
505     #define STRLEN STRLEN8
506     #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
507     #define PCRE_COMPILE PCRE_COMPILE8
508     #define PCRE_CONFIG pcre_config
509     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING8
511     #define PCRE_DFA_EXEC PCRE_DFA_EXEC8
512     #define PCRE_EXEC PCRE_EXEC8
513     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
514     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING8
515     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST8
516     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING8
517     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER8
518     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING8
519     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST8
520     #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
521     #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
522     #define PCRE_MAKETABLES pcre_maketables()
523     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524     #define PCRE_PRINTINT PCRE_PRINTINT8
525     #define PCRE_STUDY PCRE_STUDY8
526    
527     /* ----- Only 16-bit mode is supported ----- */
528    
529     #else
530     #define CHAR_SIZE 2
531     #define PCHARS PCHARS16
532     #define PCHARSV PCHARSV16
533     #define READ_CAPTURE_NAME READ_CAPTURE_NAME16
534     #define SET_PCRE_CALLOUT SET_PCRE_CALLOUT16
535     #define STRLEN STRLEN16
536     #define PCRE_ASSIGN_JIT_STACK pcre16_assign_jit_stack
537     #define PCRE_COMPILE PCRE_COMPILE16
538     #define PCRE_CONFIG pcre16_config
539     #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540     #define PCRE_COPY_SUBSTRING PCRE_COPY_SUBSTRING16
541     #define PCRE_DFA_EXEC PCRE_DFA_EXEC16
542     #define PCRE_EXEC PCRE_EXEC16
543     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
544     #define PCRE_FREE_SUBSTRING PCRE_FREE_SUBSTRING16
545     #define PCRE_FREE_SUBSTRING_LIST PCRE_FREE_SUBSTRING_LIST16
546     #define PCRE_GET_NAMED_SUBSTRING PCRE_GET_NAMED_SUBSTRING16
547     #define PCRE_GET_STRINGNUMBER PCRE_GET_STRINGNUMBER16
548     #define PCRE_GET_SUBSTRING PCRE_GET_SUBSTRING16
549     #define PCRE_GET_SUBSTRING_LIST PCRE_GET_SUBSTRING_LIST16
550     #define PCRE_JIT_STACK_ALLOC pcre16_jit_stack_alloc
551     #define PCRE_JIT_STACK_FREE pcre16_jit_stack_free
552     #define PCRE_MAKETABLES pcre16_maketables()
553     #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554     #define PCRE_PRINTINT PCRE_PRINTINT16
555     #define PCRE_STUDY PCRE_STUDY16
556     #endif
557    
558     /* ----- End of mode-specific function call macros ----- */
559    
560    
561 nigel 85 /* Other parameters */
562    
563 nigel 3 #ifndef CLOCKS_PER_SEC
564     #ifdef CLK_TCK
565     #define CLOCKS_PER_SEC CLK_TCK
566     #else
567     #define CLOCKS_PER_SEC 100
568     #endif
569     #endif
570    
571 nigel 93 /* This is the default loop count for timing. */
572    
573 nigel 75 #define LOOPREPEAT 500000
574 nigel 3
575 nigel 85 /* Static variables */
576    
577 nigel 3 static FILE *outfile;
578     static int log_store = 0;
579 nigel 63 static int callout_count;
580     static int callout_extra;
581     static int callout_fail_count;
582     static int callout_fail_id;
583 ph10 210 static int debug_lengths;
584 nigel 63 static int first_callout;
585 nigel 93 static int locale_set = 0;
586 nigel 73 static int show_malloc;
587 ph10 836 static int use_utf;
588 nigel 43 static size_t gotten_store;
589 ph10 836 static size_t first_gotten_store = 0;
590 ph10 645 static const unsigned char *last_callout_mark = NULL;
591 nigel 3
592 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
593    
594     static int buffer_size = 50000;
595 ph10 836 static pcre_uint8 *buffer = NULL;
596     static pcre_uint8 *dbuffer = NULL;
597     static pcre_uint8 *pbuffer = NULL;
598 nigel 3
599 ph10 836 /* Another buffer is needed translation to 16-bit character strings. It will
600     obtained and extended as required. */
601    
602     #ifdef SUPPORT_PCRE16
603     static int buffer16_size = 0;
604     static pcre_uint16 *buffer16 = NULL;
605    
606     #ifdef SUPPORT_PCRE8
607    
608     /* We need the table of operator lengths that is used for 16-bit compiling, in
609     order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610     data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611     appropriately for the 16-bit world. Just as a safety check, make sure that
612     COMPILE_PCRE16 is *not* set. */
613    
614     #ifdef COMPILE_PCRE16
615     #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616     #endif
617    
618     #if LINK_SIZE == 2
619     #undef LINK_SIZE
620     #define LINK_SIZE 1
621     #elif LINK_SIZE == 3 || LINK_SIZE == 4
622     #undef LINK_SIZE
623     #define LINK_SIZE 2
624     #else
625     #error LINK_SIZE must be either 2, 3, or 4
626     #endif
627    
628 zherczeg 839 #undef IMM2_SIZE
629     #define IMM2_SIZE 1
630    
631 ph10 836 #endif /* SUPPORT_PCRE8 */
632    
633     static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
634     #endif /* SUPPORT_PCRE16 */
635    
636     /* If we have 8-bit support, default use_pcre16 to false; if there is also
637     16-bit support, it can be changed by an option. If there is no 8-bit support,
638     there must be 16-bit support, so default it to 1. */
639    
640     #ifdef SUPPORT_PCRE8
641     static int use_pcre16 = 0;
642     #else
643     static int use_pcre16 = 1;
644     #endif
645    
646 ph10 598 /* Textual explanations for runtime error codes */
647 nigel 75
648 ph10 598 static const char *errtexts[] = {
649     NULL, /* 0 is no error */
650     NULL, /* NOMATCH is handled specially */
651     "NULL argument passed",
652     "bad option value",
653     "magic number missing",
654     "unknown opcode - pattern overwritten?",
655     "no more memory",
656 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
657 ph10 598 "match limit exceeded",
658     "callout error code",
659 ph10 836 NULL, /* BADUTF8/16 is handled specially */
660     NULL, /* BADUTF8/16 offset is handled specially */
661 ph10 598 NULL, /* PARTIAL is handled specially */
662     "not used - internal error",
663     "internal error - pattern overwritten?",
664     "bad count value",
665     "item unsupported for DFA matching",
666     "backreference condition or recursion test not supported for DFA matching",
667     "match limit not supported for DFA matching",
668     "workspace size exceeded in DFA matching",
669 ph10 654 "too much recursion for DFA matching",
670 ph10 598 "recursion limit exceeded",
671     "not used - internal error",
672     "invalid combination of newline options",
673     "bad offset value",
674 ph10 836 NULL, /* SHORTUTF8/16 is handled specially */
675 ph10 676 "nested recursion at the same subject position",
676 ph10 836 "JIT stack limit reached",
677     "pattern compiled in wrong mode: 8-bit/16-bit error"
678 ph10 598 };
679    
680 ph10 654
681 ph10 541 /*************************************************
682     * Alternate character tables *
683     *************************************************/
684 nigel 49
685 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
686     using the default tables of the library. However, the T option can be used to
687     select alternate sets of tables, for different kinds of testing. Note also that
688 ph10 541 the L (locale) option also adjusts the tables. */
689    
690 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
691 ph10 541 only ASCII characters. */
692    
693 ph10 836 static const pcre_uint8 tables0[] = {
694 ph10 541
695     /* This table is a lower casing table. */
696    
697     0, 1, 2, 3, 4, 5, 6, 7,
698     8, 9, 10, 11, 12, 13, 14, 15,
699     16, 17, 18, 19, 20, 21, 22, 23,
700     24, 25, 26, 27, 28, 29, 30, 31,
701     32, 33, 34, 35, 36, 37, 38, 39,
702     40, 41, 42, 43, 44, 45, 46, 47,
703     48, 49, 50, 51, 52, 53, 54, 55,
704     56, 57, 58, 59, 60, 61, 62, 63,
705     64, 97, 98, 99,100,101,102,103,
706     104,105,106,107,108,109,110,111,
707     112,113,114,115,116,117,118,119,
708     120,121,122, 91, 92, 93, 94, 95,
709     96, 97, 98, 99,100,101,102,103,
710     104,105,106,107,108,109,110,111,
711     112,113,114,115,116,117,118,119,
712     120,121,122,123,124,125,126,127,
713     128,129,130,131,132,133,134,135,
714     136,137,138,139,140,141,142,143,
715     144,145,146,147,148,149,150,151,
716     152,153,154,155,156,157,158,159,
717     160,161,162,163,164,165,166,167,
718     168,169,170,171,172,173,174,175,
719     176,177,178,179,180,181,182,183,
720     184,185,186,187,188,189,190,191,
721     192,193,194,195,196,197,198,199,
722     200,201,202,203,204,205,206,207,
723     208,209,210,211,212,213,214,215,
724     216,217,218,219,220,221,222,223,
725     224,225,226,227,228,229,230,231,
726     232,233,234,235,236,237,238,239,
727     240,241,242,243,244,245,246,247,
728     248,249,250,251,252,253,254,255,
729    
730     /* This table is a case flipping table. */
731    
732     0, 1, 2, 3, 4, 5, 6, 7,
733     8, 9, 10, 11, 12, 13, 14, 15,
734     16, 17, 18, 19, 20, 21, 22, 23,
735     24, 25, 26, 27, 28, 29, 30, 31,
736     32, 33, 34, 35, 36, 37, 38, 39,
737     40, 41, 42, 43, 44, 45, 46, 47,
738     48, 49, 50, 51, 52, 53, 54, 55,
739     56, 57, 58, 59, 60, 61, 62, 63,
740     64, 97, 98, 99,100,101,102,103,
741     104,105,106,107,108,109,110,111,
742     112,113,114,115,116,117,118,119,
743     120,121,122, 91, 92, 93, 94, 95,
744     96, 65, 66, 67, 68, 69, 70, 71,
745     72, 73, 74, 75, 76, 77, 78, 79,
746     80, 81, 82, 83, 84, 85, 86, 87,
747     88, 89, 90,123,124,125,126,127,
748     128,129,130,131,132,133,134,135,
749     136,137,138,139,140,141,142,143,
750     144,145,146,147,148,149,150,151,
751     152,153,154,155,156,157,158,159,
752     160,161,162,163,164,165,166,167,
753     168,169,170,171,172,173,174,175,
754     176,177,178,179,180,181,182,183,
755     184,185,186,187,188,189,190,191,
756     192,193,194,195,196,197,198,199,
757     200,201,202,203,204,205,206,207,
758     208,209,210,211,212,213,214,215,
759     216,217,218,219,220,221,222,223,
760     224,225,226,227,228,229,230,231,
761     232,233,234,235,236,237,238,239,
762     240,241,242,243,244,245,246,247,
763     248,249,250,251,252,253,254,255,
764    
765     /* This table contains bit maps for various character classes. Each map is 32
766     bytes long and the bits run from the least significant end of each byte. The
767     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
768     graph, print, punct, and cntrl. Other classes are built from combinations. */
769    
770     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
771     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
772     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
773     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
774    
775     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
776     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
777     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
778     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
779    
780     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
781     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
782     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
783     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
784    
785     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
786     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
787     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
788     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
789    
790     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
791     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
792     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
793     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
794    
795     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
796     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
797     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
798     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
799    
800     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
801     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
802     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
803     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
804    
805     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
806     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
807     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
808     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
809    
810     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
811     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
812     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
813     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
814    
815     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
816     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
817     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819    
820     /* This table identifies various classes of character by individual bits:
821     0x01 white space character
822     0x02 letter
823     0x04 decimal digit
824     0x08 hexadecimal digit
825     0x10 alphanumeric or '_'
826     0x80 regular expression metacharacter or binary zero
827     */
828    
829     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
830     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
831     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
832     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
833     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
834     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
835     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
836     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
837     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
838     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
839     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
840     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
841     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
842     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
843     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
844     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
845     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
846     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
847     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
848     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
849     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
850     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
851     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
852     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
853     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
854     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
855     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
856     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
857     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
858     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
859     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
860     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
861    
862 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
863     be at least an approximation of ISO 8859. In particular, there are characters
864 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
865    
866 ph10 836 static const pcre_uint8 tables1[] = {
867 ph10 541 0,1,2,3,4,5,6,7,
868     8,9,10,11,12,13,14,15,
869     16,17,18,19,20,21,22,23,
870     24,25,26,27,28,29,30,31,
871     32,33,34,35,36,37,38,39,
872     40,41,42,43,44,45,46,47,
873     48,49,50,51,52,53,54,55,
874     56,57,58,59,60,61,62,63,
875     64,97,98,99,100,101,102,103,
876     104,105,106,107,108,109,110,111,
877     112,113,114,115,116,117,118,119,
878     120,121,122,91,92,93,94,95,
879     96,97,98,99,100,101,102,103,
880     104,105,106,107,108,109,110,111,
881     112,113,114,115,116,117,118,119,
882     120,121,122,123,124,125,126,127,
883     128,129,130,131,132,133,134,135,
884     136,137,138,139,140,141,142,143,
885     144,145,146,147,148,149,150,151,
886     152,153,154,155,156,157,158,159,
887     160,161,162,163,164,165,166,167,
888     168,169,170,171,172,173,174,175,
889     176,177,178,179,180,181,182,183,
890     184,185,186,187,188,189,190,191,
891     224,225,226,227,228,229,230,231,
892     232,233,234,235,236,237,238,239,
893     240,241,242,243,244,245,246,215,
894     248,249,250,251,252,253,254,223,
895     224,225,226,227,228,229,230,231,
896     232,233,234,235,236,237,238,239,
897     240,241,242,243,244,245,246,247,
898     248,249,250,251,252,253,254,255,
899     0,1,2,3,4,5,6,7,
900     8,9,10,11,12,13,14,15,
901     16,17,18,19,20,21,22,23,
902     24,25,26,27,28,29,30,31,
903     32,33,34,35,36,37,38,39,
904     40,41,42,43,44,45,46,47,
905     48,49,50,51,52,53,54,55,
906     56,57,58,59,60,61,62,63,
907     64,97,98,99,100,101,102,103,
908     104,105,106,107,108,109,110,111,
909     112,113,114,115,116,117,118,119,
910     120,121,122,91,92,93,94,95,
911     96,65,66,67,68,69,70,71,
912     72,73,74,75,76,77,78,79,
913     80,81,82,83,84,85,86,87,
914     88,89,90,123,124,125,126,127,
915     128,129,130,131,132,133,134,135,
916     136,137,138,139,140,141,142,143,
917     144,145,146,147,148,149,150,151,
918     152,153,154,155,156,157,158,159,
919     160,161,162,163,164,165,166,167,
920     168,169,170,171,172,173,174,175,
921     176,177,178,179,180,181,182,183,
922     184,185,186,187,188,189,190,191,
923     224,225,226,227,228,229,230,231,
924     232,233,234,235,236,237,238,239,
925     240,241,242,243,244,245,246,215,
926     248,249,250,251,252,253,254,223,
927     192,193,194,195,196,197,198,199,
928     200,201,202,203,204,205,206,207,
929     208,209,210,211,212,213,214,247,
930     216,217,218,219,220,221,222,255,
931     0,62,0,0,1,0,0,0,
932     0,0,0,0,0,0,0,0,
933     32,0,0,0,1,0,0,0,
934     0,0,0,0,0,0,0,0,
935     0,0,0,0,0,0,255,3,
936     126,0,0,0,126,0,0,0,
937     0,0,0,0,0,0,0,0,
938     0,0,0,0,0,0,0,0,
939     0,0,0,0,0,0,255,3,
940     0,0,0,0,0,0,0,0,
941     0,0,0,0,0,0,12,2,
942     0,0,0,0,0,0,0,0,
943     0,0,0,0,0,0,0,0,
944     254,255,255,7,0,0,0,0,
945     0,0,0,0,0,0,0,0,
946     255,255,127,127,0,0,0,0,
947     0,0,0,0,0,0,0,0,
948     0,0,0,0,254,255,255,7,
949     0,0,0,0,0,4,32,4,
950     0,0,0,128,255,255,127,255,
951     0,0,0,0,0,0,255,3,
952     254,255,255,135,254,255,255,7,
953     0,0,0,0,0,4,44,6,
954     255,255,127,255,255,255,127,255,
955     0,0,0,0,254,255,255,255,
956     255,255,255,255,255,255,255,127,
957     0,0,0,0,254,255,255,255,
958     255,255,255,255,255,255,255,255,
959     0,2,0,0,255,255,255,255,
960     255,255,255,255,255,255,255,127,
961     0,0,0,0,255,255,255,255,
962     255,255,255,255,255,255,255,255,
963     0,0,0,0,254,255,0,252,
964     1,0,0,248,1,0,0,120,
965     0,0,0,0,254,255,255,255,
966     0,0,128,0,0,0,128,0,
967     255,255,255,255,0,0,0,0,
968     0,0,0,0,0,0,0,128,
969     255,255,255,255,0,0,0,0,
970     0,0,0,0,0,0,0,0,
971     128,0,0,0,0,0,0,0,
972     0,1,1,0,1,1,0,0,
973     0,0,0,0,0,0,0,0,
974     0,0,0,0,0,0,0,0,
975     1,0,0,0,128,0,0,0,
976     128,128,128,128,0,0,128,0,
977     28,28,28,28,28,28,28,28,
978     28,28,0,0,0,0,0,128,
979     0,26,26,26,26,26,26,18,
980     18,18,18,18,18,18,18,18,
981     18,18,18,18,18,18,18,18,
982     18,18,18,128,128,0,128,16,
983     0,26,26,26,26,26,26,18,
984     18,18,18,18,18,18,18,18,
985     18,18,18,18,18,18,18,18,
986     18,18,18,128,128,0,0,0,
987     0,0,0,0,0,1,0,0,
988     0,0,0,0,0,0,0,0,
989     0,0,0,0,0,0,0,0,
990     0,0,0,0,0,0,0,0,
991     1,0,0,0,0,0,0,0,
992     0,0,18,0,0,0,0,0,
993     0,0,20,20,0,18,0,0,
994     0,20,18,0,0,0,0,0,
995     18,18,18,18,18,18,18,18,
996     18,18,18,18,18,18,18,18,
997     18,18,18,18,18,18,18,0,
998     18,18,18,18,18,18,18,18,
999     18,18,18,18,18,18,18,18,
1000     18,18,18,18,18,18,18,18,
1001     18,18,18,18,18,18,18,0,
1002     18,18,18,18,18,18,18,18
1003     };
1004    
1005    
1006    
1007 ph10 558
1008     #ifndef HAVE_STRERROR
1009 nigel 49 /*************************************************
1010 ph10 558 * Provide strerror() for non-ANSI libraries *
1011     *************************************************/
1012    
1013     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1014     in their libraries, but can provide the same facility by this simple
1015     alternative function. */
1016    
1017     extern int sys_nerr;
1018     extern char *sys_errlist[];
1019    
1020     char *
1021     strerror(int n)
1022     {
1023     if (n < 0 || n >= sys_nerr) return "unknown error number";
1024     return sys_errlist[n];
1025     }
1026     #endif /* HAVE_STRERROR */
1027    
1028    
1029 ph10 667 /*************************************************
1030     * JIT memory callback *
1031     *************************************************/
1032 ph10 558
1033 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
1034     {
1035     return (pcre_jit_stack *)arg;
1036     }
1037 ph10 558
1038 ph10 667
1039 ph10 836 #if !defined NOUTF || defined SUPPORT_PCRE16
1040 ph10 558 /*************************************************
1041 ph10 836 * Convert UTF-8 string to value *
1042     *************************************************/
1043    
1044     /* This function takes one or more bytes that represents a UTF-8 character,
1045     and returns the value of the character.
1046    
1047     Argument:
1048     utf8bytes a pointer to the byte vector
1049     vptr a pointer to an int to receive the value
1050    
1051     Returns: > 0 => the number of bytes consumed
1052     -6 to 0 => malformed UTF-8 character at offset = (-return)
1053     */
1054    
1055     static int
1056     utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1057     {
1058     int c = *utf8bytes++;
1059     int d = c;
1060     int i, j, s;
1061    
1062     for (i = -1; i < 6; i++) /* i is number of additional bytes */
1063     {
1064     if ((d & 0x80) == 0) break;
1065     d <<= 1;
1066     }
1067    
1068     if (i == -1) { *vptr = c; return 1; } /* ascii character */
1069     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
1070    
1071     /* i now has a value in the range 1-5 */
1072    
1073     s = 6*i;
1074     d = (c & utf8_table3[i]) << s;
1075    
1076     for (j = 0; j < i; j++)
1077     {
1078     c = *utf8bytes++;
1079     if ((c & 0xc0) != 0x80) return -(j+1);
1080     s -= 6;
1081     d |= (c & 0x3f) << s;
1082     }
1083    
1084     /* Check that encoding was the correct unique one */
1085    
1086     for (j = 0; j < utf8_table1_size; j++)
1087     if (d <= utf8_table1[j]) break;
1088     if (j != i) return -(i+1);
1089    
1090     /* Valid value */
1091    
1092     *vptr = d;
1093     return i+1;
1094     }
1095     #endif /* NOUTF || SUPPORT_PCRE16 */
1096    
1097    
1098    
1099     #if !defined NOUTF || defined SUPPORT_PCRE16
1100     /*************************************************
1101     * Convert character value to UTF-8 *
1102     *************************************************/
1103    
1104     /* This function takes an integer value in the range 0 - 0x7fffffff
1105     and encodes it as a UTF-8 character in 0 to 6 bytes.
1106    
1107     Arguments:
1108     cvalue the character value
1109     utf8bytes pointer to buffer for result - at least 6 bytes long
1110    
1111     Returns: number of characters placed in the buffer
1112     */
1113    
1114     static int
1115     ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1116     {
1117     register int i, j;
1118     for (i = 0; i < utf8_table1_size; i++)
1119     if (cvalue <= utf8_table1[i]) break;
1120     utf8bytes += i;
1121     for (j = i; j > 0; j--)
1122     {
1123     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1124     cvalue >>= 6;
1125     }
1126     *utf8bytes = utf8_table2[i] | cvalue;
1127     return i + 1;
1128     }
1129 ph10 842 #endif
1130 ph10 836
1131    
1132     #ifdef SUPPORT_PCRE16
1133     /*************************************************
1134     * Convert a string to 16-bit *
1135     *************************************************/
1136    
1137     /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1138     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1139     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1140     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1141     result is always left in buffer16.
1142    
1143     Note that this function does not object to surrogate values. This is
1144     deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1145     for the purpose of testing that they are correctly faulted.
1146    
1147 ph10 842 Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1148 ph10 836 in UTF-8 so that values greater than 255 can be handled.
1149    
1150     Arguments:
1151     data TRUE if converting a data line; FALSE for a regex
1152     p points to a byte string
1153     utf true if UTF-8 (to be converted to UTF-16)
1154     len number of bytes in the string (excluding trailing zero)
1155    
1156     Returns: number of 16-bit data items used (excluding trailing zero)
1157     OR -1 if a UTF-8 string is malformed
1158     OR -2 if a value > 0x10ffff is encountered
1159 ph10 842 OR -3 if a value > 0xffff is encountered when not in UTF mode
1160 ph10 836 */
1161    
1162     static int
1163     to16(int data, pcre_uint8 *p, int utf, int len)
1164     {
1165     pcre_uint16 *pp;
1166    
1167     if (buffer16_size < 2*len + 2)
1168     {
1169     if (buffer16 != NULL) free(buffer16);
1170     buffer16_size = 2*len + 2;
1171     buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1172     if (buffer16 == NULL)
1173     {
1174     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1175     exit(1);
1176     }
1177     }
1178    
1179     pp = buffer16;
1180    
1181     if (!utf && !data)
1182     {
1183     while (len-- > 0) *pp++ = *p++;
1184     }
1185    
1186     else
1187     {
1188     int c = 0;
1189     while (len > 0)
1190     {
1191     int chlen = utf82ord(p, &c);
1192     if (chlen <= 0) return -1;
1193     if (c > 0x10ffff) return -2;
1194     p += chlen;
1195     len -= chlen;
1196     if (c < 0x10000) *pp++ = c; else
1197     {
1198     if (!utf) return -3;
1199     c -= 0x10000;
1200     *pp++ = 0xD800 | (c >> 10);
1201     *pp++ = 0xDC00 | (c & 0x3ff);
1202     }
1203     }
1204     }
1205    
1206     *pp = 0;
1207     return pp - buffer16;
1208     }
1209     #endif
1210    
1211    
1212     /*************************************************
1213 nigel 91 * Read or extend an input line *
1214     *************************************************/
1215    
1216     /* Input lines are read into buffer, but both patterns and data lines can be
1217     continued over multiple input lines. In addition, if the buffer fills up, we
1218     want to automatically expand it so as to be able to handle extremely large
1219     lines that are needed for certain stress tests. When the input buffer is
1220     expanded, the other two buffers must also be expanded likewise, and the
1221     contents of pbuffer, which are a copy of the input for callouts, must be
1222     preserved (for when expansion happens for a data line). This is not the most
1223     optimal way of handling this, but hey, this is just a test program!
1224    
1225     Arguments:
1226     f the file to read
1227     start where in buffer to start (this *must* be within buffer)
1228 ph10 287 prompt for stdin or readline()
1229 nigel 91
1230     Returns: pointer to the start of new data
1231     could be a copy of start, or could be moved
1232     NULL if no data read and EOF reached
1233     */
1234    
1235 ph10 836 static pcre_uint8 *
1236     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1237 nigel 91 {
1238 ph10 836 pcre_uint8 *here = start;
1239 nigel 91
1240     for (;;)
1241     {
1242 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
1243 nigel 93
1244 nigel 91 if (rlen > 1000)
1245     {
1246     int dlen;
1247 ph10 289
1248 ph10 287 /* If libreadline support is required, use readline() to read a line if the
1249     input is a terminal. Note that readline() removes the trailing newline, so
1250     we must put it back again, to be compatible with fgets(). */
1251 ph10 289
1252 ph10 287 #ifdef SUPPORT_LIBREADLINE
1253     if (isatty(fileno(f)))
1254     {
1255 ph10 289 size_t len;
1256 ph10 287 char *s = readline(prompt);
1257     if (s == NULL) return (here == start)? NULL : start;
1258     len = strlen(s);
1259 ph10 289 if (len > 0) add_history(s);
1260 ph10 287 if (len > rlen - 1) len = rlen - 1;
1261     memcpy(here, s, len);
1262     here[len] = '\n';
1263 ph10 289 here[len+1] = 0;
1264     free(s);
1265 ph10 287 }
1266 ph10 289 else
1267     #endif
1268    
1269 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
1270 ph10 289
1271 ph10 287 {
1272 ph10 516 if (f == stdin) printf("%s", prompt);
1273 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
1274     return (here == start)? NULL : start;
1275 ph10 289 }
1276    
1277 nigel 91 dlen = (int)strlen((char *)here);
1278     if (dlen > 0 && here[dlen - 1] == '\n') return start;
1279     here += dlen;
1280     }
1281    
1282     else
1283     {
1284     int new_buffer_size = 2*buffer_size;
1285 ph10 836 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1286     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1287     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1288 nigel 91
1289     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1290     {
1291     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1292     exit(1);
1293     }
1294    
1295     memcpy(new_buffer, buffer, buffer_size);
1296     memcpy(new_pbuffer, pbuffer, buffer_size);
1297    
1298     buffer_size = new_buffer_size;
1299    
1300     start = new_buffer + (start - buffer);
1301     here = new_buffer + (here - buffer);
1302    
1303     free(buffer);
1304     free(dbuffer);
1305     free(pbuffer);
1306    
1307     buffer = new_buffer;
1308     dbuffer = new_dbuffer;
1309     pbuffer = new_pbuffer;
1310     }
1311     }
1312    
1313     return NULL; /* Control never gets here */
1314     }
1315    
1316    
1317    
1318     /*************************************************
1319 nigel 63 * Read number from string *
1320     *************************************************/
1321    
1322     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1323     around with conditional compilation, just do the job by hand. It is only used
1324 nigel 93 for unpicking arguments, so just keep it simple.
1325 nigel 63
1326     Arguments:
1327     str string to be converted
1328     endptr where to put the end pointer
1329    
1330     Returns: the unsigned long
1331     */
1332    
1333     static int
1334 ph10 836 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1335 nigel 63 {
1336     int result = 0;
1337     while(*str != 0 && isspace(*str)) str++;
1338     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1339     *endptr = str;
1340     return(result);
1341     }
1342    
1343    
1344    
1345 nigel 49 /*************************************************
1346 ph10 836 * Print one character *
1347 nigel 49 *************************************************/
1348    
1349 ph10 836 /* Print a single character either literally, or as a hex escape. */
1350 nigel 49
1351 ph10 836 static int pchar(int c, FILE *f)
1352 nigel 49 {
1353 ph10 836 if (PRINTOK(c))
1354     {
1355     if (f != NULL) fprintf(f, "%c", c);
1356     return 1;
1357     }
1358 nigel 49
1359 ph10 836 if (c < 0x100)
1360 nigel 49 {
1361 ph10 836 if (use_utf)
1362     {
1363     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1364     return 6;
1365     }
1366     else
1367     {
1368     if (f != NULL) fprintf(f, "\\x%02x", c);
1369     return 4;
1370     }
1371 nigel 49 }
1372    
1373 ph10 836 if (f != NULL) fprintf(f, "\\x{%02x}", c);
1374     return (c <= 0x000000ff)? 6 :
1375     (c <= 0x00000fff)? 7 :
1376     (c <= 0x0000ffff)? 8 :
1377     (c <= 0x000fffff)? 9 : 10;
1378     }
1379 nigel 49
1380    
1381    
1382 ph10 836 #ifdef SUPPORT_PCRE8
1383     /*************************************************
1384     * Print 8-bit character string *
1385     *************************************************/
1386 nigel 49
1387 ph10 836 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1388     If handed a NULL file, just counts chars without printing. */
1389 nigel 49
1390 ph10 836 static int pchars(pcre_uint8 *p, int length, FILE *f)
1391     {
1392     int c = 0;
1393     int yield = 0;
1394 nigel 49
1395 ph10 836 if (length < 0)
1396     length = strlen((char *)p);
1397 nigel 49
1398 ph10 836 while (length-- > 0)
1399     {
1400     #if !defined NOUTF
1401     if (use_utf)
1402     {
1403     int rc = utf82ord(p, &c);
1404     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1405     {
1406     length -= rc - 1;
1407     p += rc;
1408     yield += pchar(c, f);
1409     continue;
1410     }
1411     }
1412     #endif
1413     c = *p++;
1414     yield += pchar(c, f);
1415     }
1416    
1417     return yield;
1418 nigel 49 }
1419 nigel 79 #endif
1420 nigel 49
1421    
1422 nigel 79
1423 ph10 836 #ifdef SUPPORT_PCRE16
1424 nigel 63 /*************************************************
1425 ph10 836 * Find length of 0-terminated 16-bit string *
1426 nigel 85 *************************************************/
1427    
1428 ph10 836 static int strlen16(PCRE_SPTR16 p)
1429 nigel 85 {
1430 ph10 836 int len = 0;
1431     while (*p++ != 0) len++;
1432     return len;
1433 nigel 85 }
1434 ph10 836 #endif /* SUPPORT_PCRE16 */
1435 nigel 85
1436    
1437 ph10 836 #ifdef SUPPORT_PCRE16
1438 nigel 85 /*************************************************
1439 ph10 836 * Print 16-bit character string *
1440 nigel 63 *************************************************/
1441 nigel 49
1442 ph10 836 /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1443     If handed a NULL file, just counts chars without printing. */
1444 nigel 49
1445 ph10 836 static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1446 nigel 3 {
1447 nigel 63 int yield = 0;
1448 nigel 3
1449 ph10 836 if (length < 0)
1450     length = strlen16(p);
1451    
1452 nigel 63 while (length-- > 0)
1453 nigel 3 {
1454 ph10 836 int c = *p++ & 0xffff;
1455     #if !defined NOUTF
1456     if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1457 nigel 63 {
1458 ph10 836 int d = *p & 0xffff;
1459     if (d >= 0xDC00 && d < 0xDFFF)
1460 nigel 63 {
1461 ph10 836 c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1462     length--;
1463     p++;
1464 nigel 63 }
1465     }
1466 nigel 79 #endif
1467 ph10 836 yield += pchar(c, f);
1468     }
1469 nigel 3
1470 ph10 836 return yield;
1471     }
1472     #endif /* SUPPORT_PCRE16 */
1473 nigel 63
1474 ph10 836
1475    
1476     #ifdef SUPPORT_PCRE8
1477     /*************************************************
1478     * Read a capture name (8-bit) and check it *
1479     *************************************************/
1480    
1481     static pcre_uint8 *
1482     read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1483     {
1484     pcre_uint8 *npp = *pp;
1485     while (isalnum(*p)) *npp++ = *p++;
1486     *npp++ = 0;
1487     *npp = 0;
1488     if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1489     {
1490     fprintf(outfile, "no parentheses with name \"");
1491     PCHARSV(*pp, 0, -1, outfile);
1492     fprintf(outfile, "\"\n");
1493 nigel 63 }
1494 nigel 3
1495 ph10 836 *pp = npp;
1496     return p;
1497 nigel 63 }
1498 ph10 836 #endif /* SUPPORT_PCRE8 */
1499 nigel 23
1500 nigel 3
1501 nigel 23
1502 ph10 836 #ifdef SUPPORT_PCRE16
1503 nigel 63 /*************************************************
1504 ph10 836 * Read a capture name (16-bit) and check it *
1505     *************************************************/
1506    
1507     /* Note that the text being read is 8-bit. */
1508    
1509     static pcre_uint8 *
1510     read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1511     {
1512     pcre_uint16 *npp = *pp;
1513     while (isalnum(*p)) *npp++ = *p++;
1514     *npp++ = 0;
1515     *npp = 0;
1516     if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1517     {
1518     fprintf(outfile, "no parentheses with name \"");
1519     PCHARSV(*pp, 0, -1, outfile);
1520     fprintf(outfile, "\"\n");
1521     }
1522     *pp = npp;
1523     return p;
1524     }
1525     #endif /* SUPPORT_PCRE16 */
1526    
1527    
1528    
1529     /*************************************************
1530 nigel 63 * Callout function *
1531     *************************************************/
1532 nigel 3
1533 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1534     the match. Yield zero unless more callouts than the fail count, or the callout
1535     data is not zero. */
1536 nigel 3
1537 nigel 63 static int callout(pcre_callout_block *cb)
1538     {
1539     FILE *f = (first_callout | callout_extra)? outfile : NULL;
1540 nigel 75 int i, pre_start, post_start, subject_length;
1541 nigel 3
1542 nigel 63 if (callout_extra)
1543     {
1544     fprintf(f, "Callout %d: last capture = %d\n",
1545     cb->callout_number, cb->capture_last);
1546 nigel 3
1547 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
1548     {
1549     if (cb->offset_vector[i] < 0)
1550     fprintf(f, "%2d: <unset>\n", i/2);
1551     else
1552     {
1553     fprintf(f, "%2d: ", i/2);
1554 ph10 836 PCHARSV(cb->subject, cb->offset_vector[i],
1555 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1556     fprintf(f, "\n");
1557     }
1558     }
1559     }
1560 nigel 3
1561 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
1562     datails. On subsequent calls in the same match, we use pchars just to find the
1563     printed lengths of the substrings. */
1564 nigel 3
1565 nigel 63 if (f != NULL) fprintf(f, "--->");
1566 nigel 3
1567 ph10 836 PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1568     PCHARS(post_start, cb->subject, cb->start_match,
1569 nigel 63 cb->current_position - cb->start_match, f);
1570 nigel 3
1571 ph10 836 PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1572 nigel 75
1573 ph10 836 PCHARSV(cb->subject, cb->current_position,
1574 nigel 63 cb->subject_length - cb->current_position, f);
1575 nigel 3
1576 nigel 63 if (f != NULL) fprintf(f, "\n");
1577 nigel 9
1578 nigel 63 /* Always print appropriate indicators, with callout number if not already
1579 nigel 75 shown. For automatic callouts, show the pattern offset. */
1580 nigel 3
1581 nigel 75 if (cb->callout_number == 255)
1582     {
1583     fprintf(outfile, "%+3d ", cb->pattern_position);
1584     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1585     }
1586     else
1587     {
1588     if (callout_extra) fprintf(outfile, " ");
1589     else fprintf(outfile, "%3d ", cb->callout_number);
1590     }
1591 nigel 3
1592 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1593     fprintf(outfile, "^");
1594 nigel 3
1595 nigel 63 if (post_start > 0)
1596     {
1597     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1598     fprintf(outfile, "^");
1599 nigel 3 }
1600    
1601 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1602     fprintf(outfile, " ");
1603    
1604     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1605     pbuffer + cb->pattern_position);
1606    
1607 nigel 63 fprintf(outfile, "\n");
1608     first_callout = 0;
1609 nigel 3
1610 ph10 654 if (cb->mark != last_callout_mark)
1611 ph10 645 {
1612 ph10 836 if (cb->mark == NULL)
1613     fprintf(outfile, "Latest Mark: <unset>\n");
1614     else
1615     {
1616     fprintf(outfile, "Latest Mark: ");
1617     PCHARSV(cb->mark, 0, -1, outfile);
1618     putc('\n', outfile);
1619     }
1620 ph10 654 last_callout_mark = cb->mark;
1621     }
1622 ph10 645
1623 nigel 71 if (cb->callout_data != NULL)
1624 nigel 49 {
1625 nigel 71 int callout_data = *((int *)(cb->callout_data));
1626     if (callout_data != 0)
1627     {
1628     fprintf(outfile, "Callout data = %d\n", callout_data);
1629     return callout_data;
1630     }
1631 nigel 63 }
1632 nigel 49
1633 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
1634     (++callout_count >= callout_fail_count)? 1 : 0;
1635 nigel 3 }
1636    
1637    
1638 nigel 63 /*************************************************
1639 nigel 73 * Local malloc functions *
1640 nigel 63 *************************************************/
1641 nigel 3
1642 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1643 ph10 836 compiled re, which is the first store request that pcre_compile() makes. The
1644     show_malloc variable is set only during matching. */
1645 nigel 3
1646     static void *new_malloc(size_t size)
1647     {
1648 nigel 73 void *block = malloc(size);
1649 nigel 43 gotten_store = size;
1650 ph10 836 if (first_gotten_store == 0) first_gotten_store = size;
1651 nigel 73 if (show_malloc)
1652 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1653 nigel 73 return block;
1654 nigel 3 }
1655    
1656 nigel 73 static void new_free(void *block)
1657     {
1658     if (show_malloc)
1659     fprintf(outfile, "free %p\n", block);
1660     free(block);
1661     }
1662 nigel 3
1663 nigel 73 /* For recursion malloc/free, to test stacking calls */
1664    
1665     static void *stack_malloc(size_t size)
1666     {
1667     void *block = malloc(size);
1668     if (show_malloc)
1669 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1670 nigel 73 return block;
1671     }
1672    
1673     static void stack_free(void *block)
1674     {
1675     if (show_malloc)
1676     fprintf(outfile, "stack_free %p\n", block);
1677     free(block);
1678     }
1679    
1680    
1681 nigel 63 /*************************************************
1682     * Call pcre_fullinfo() *
1683     *************************************************/
1684 nigel 43
1685 ph10 836 /* Get one piece of information from the pcre_fullinfo() function. When only
1686     one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1687     value, but the code is defensive.
1688 nigel 43
1689 ph10 836 Arguments:
1690     re compiled regex
1691     study study data
1692     option PCRE_INFO_xxx option
1693     ptr where to put the data
1694    
1695     Returns: 0 when OK, < 0 on error
1696     */
1697    
1698     static int
1699     new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1700 nigel 43 {
1701     int rc;
1702 ph10 836
1703     if (use_pcre16)
1704     #ifdef SUPPORT_PCRE16
1705     rc = pcre16_fullinfo(re, study, option, ptr);
1706     #else
1707     rc = PCRE_ERROR_BADMODE;
1708     #endif
1709     else
1710     #ifdef SUPPORT_PCRE8
1711     rc = pcre_fullinfo(re, study, option, ptr);
1712     #else
1713     rc = PCRE_ERROR_BADMODE;
1714     #endif
1715    
1716     if (rc < 0)
1717     {
1718     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1719     use_pcre16? "16" : "", option);
1720     if (rc == PCRE_ERROR_BADMODE)
1721     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1722     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1723     }
1724    
1725     return rc;
1726 nigel 43 }
1727    
1728    
1729    
1730 nigel 63 /*************************************************
1731 ph10 836 * Swap byte functions *
1732 nigel 75 *************************************************/
1733    
1734 ph10 836 /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1735     value, respectively.
1736    
1737     Arguments:
1738     value any number
1739    
1740     Returns: the byte swapped value
1741     */
1742    
1743     static pcre_uint32
1744     swap_uint32(pcre_uint32 value)
1745 nigel 75 {
1746     return ((value & 0x000000ff) << 24) |
1747     ((value & 0x0000ff00) << 8) |
1748     ((value & 0x00ff0000) >> 8) |
1749 ph10 836 (value >> 24);
1750 nigel 75 }
1751    
1752 ph10 836 static pcre_uint16
1753     swap_uint16(pcre_uint16 value)
1754     {
1755     return (value >> 8) | (value << 8);
1756     }
1757 nigel 75
1758    
1759    
1760     /*************************************************
1761 ph10 836 * Flip bytes in a compiled pattern *
1762     *************************************************/
1763    
1764     /* This function is called if the 'F' option was present on a pattern that is
1765     to be written to a file. We flip the bytes of all the integer fields in the
1766     regex data block and the study block. In 16-bit mode this also flips relevant
1767     bytes in the pattern itself. This is to make it possible to test PCRE's
1768     ability to reload byte-flipped patterns, e.g. those compiled on a different
1769     architecture. */
1770    
1771     static void
1772     regexflip(pcre *ere, pcre_extra *extra)
1773     {
1774     real_pcre *re = (real_pcre *)ere;
1775     #ifdef SUPPORT_PCRE16
1776     int op;
1777     pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1778     int length = re->name_count * re->name_entry_size;
1779     #ifdef SUPPORT_UTF
1780     BOOL utf = (re->options & PCRE_UTF16) != 0;
1781     BOOL utf16_char = FALSE;
1782     #endif /* SUPPORT_UTF */
1783     #endif /* SUPPORT_PCRE16 */
1784    
1785     /* Always flip the bytes in the main data block and study blocks. */
1786    
1787     re->magic_number = REVERSED_MAGIC_NUMBER;
1788     re->size = swap_uint32(re->size);
1789     re->options = swap_uint32(re->options);
1790     re->flags = swap_uint16(re->flags);
1791     re->top_bracket = swap_uint16(re->top_bracket);
1792     re->top_backref = swap_uint16(re->top_backref);
1793     re->first_char = swap_uint16(re->first_char);
1794     re->req_char = swap_uint16(re->req_char);
1795     re->name_table_offset = swap_uint16(re->name_table_offset);
1796     re->name_entry_size = swap_uint16(re->name_entry_size);
1797     re->name_count = swap_uint16(re->name_count);
1798    
1799     if (extra != NULL)
1800     {
1801     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1802     rsd->size = swap_uint32(rsd->size);
1803     rsd->flags = swap_uint32(rsd->flags);
1804     rsd->minlength = swap_uint32(rsd->minlength);
1805     }
1806    
1807     /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1808     in the name table, if present, and then in the pattern itself. */
1809    
1810     #ifdef SUPPORT_PCRE16
1811     if (!use_pcre16) return;
1812    
1813     while(TRUE)
1814     {
1815     /* Swap previous characters. */
1816     while (length-- > 0)
1817     {
1818     *ptr = swap_uint16(*ptr);
1819     ptr++;
1820     }
1821     #ifdef SUPPORT_UTF
1822     if (utf16_char)
1823     {
1824     if ((ptr[-1] & 0xfc00) == 0xd800)
1825     {
1826     /* We know that there is only one extra character in UTF-16. */
1827     *ptr = swap_uint16(*ptr);
1828     ptr++;
1829     }
1830     }
1831     utf16_char = FALSE;
1832     #endif /* SUPPORT_UTF */
1833    
1834     /* Get next opcode. */
1835    
1836     length = 0;
1837     op = *ptr;
1838     *ptr++ = swap_uint16(op);
1839    
1840     switch (op)
1841     {
1842     case OP_END:
1843     return;
1844    
1845     #ifdef SUPPORT_UTF
1846     case OP_CHAR:
1847     case OP_CHARI:
1848     case OP_NOT:
1849     case OP_NOTI:
1850     case OP_STAR:
1851     case OP_MINSTAR:
1852     case OP_PLUS:
1853     case OP_MINPLUS:
1854     case OP_QUERY:
1855     case OP_MINQUERY:
1856     case OP_UPTO:
1857     case OP_MINUPTO:
1858     case OP_EXACT:
1859     case OP_POSSTAR:
1860     case OP_POSPLUS:
1861     case OP_POSQUERY:
1862     case OP_POSUPTO:
1863     case OP_STARI:
1864     case OP_MINSTARI:
1865     case OP_PLUSI:
1866     case OP_MINPLUSI:
1867     case OP_QUERYI:
1868     case OP_MINQUERYI:
1869     case OP_UPTOI:
1870     case OP_MINUPTOI:
1871     case OP_EXACTI:
1872     case OP_POSSTARI:
1873     case OP_POSPLUSI:
1874     case OP_POSQUERYI:
1875     case OP_POSUPTOI:
1876     case OP_NOTSTAR:
1877     case OP_NOTMINSTAR:
1878     case OP_NOTPLUS:
1879     case OP_NOTMINPLUS:
1880     case OP_NOTQUERY:
1881     case OP_NOTMINQUERY:
1882     case OP_NOTUPTO:
1883     case OP_NOTMINUPTO:
1884     case OP_NOTEXACT:
1885     case OP_NOTPOSSTAR:
1886     case OP_NOTPOSPLUS:
1887     case OP_NOTPOSQUERY:
1888     case OP_NOTPOSUPTO:
1889     case OP_NOTSTARI:
1890     case OP_NOTMINSTARI:
1891     case OP_NOTPLUSI:
1892     case OP_NOTMINPLUSI:
1893     case OP_NOTQUERYI:
1894     case OP_NOTMINQUERYI:
1895     case OP_NOTUPTOI:
1896     case OP_NOTMINUPTOI:
1897     case OP_NOTEXACTI:
1898     case OP_NOTPOSSTARI:
1899     case OP_NOTPOSPLUSI:
1900     case OP_NOTPOSQUERYI:
1901     case OP_NOTPOSUPTOI:
1902     if (utf) utf16_char = TRUE;
1903     #endif
1904     /* Fall through. */
1905    
1906     default:
1907     length = OP_lengths16[op] - 1;
1908     break;
1909    
1910     case OP_CLASS:
1911     case OP_NCLASS:
1912     /* Skip the character bit map. */
1913     ptr += 32/sizeof(pcre_uint16);
1914     length = 0;
1915     break;
1916    
1917     case OP_XCLASS:
1918 zherczeg 839 /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1919     if (LINK_SIZE > 1)
1920     length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1921     - (1 + LINK_SIZE + 1));
1922     else
1923     length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1924    
1925 ph10 836 /* Reverse the size of the XCLASS instance. */
1926 zherczeg 839 *ptr = swap_uint16(*ptr);
1927 ph10 836 ptr++;
1928     if (LINK_SIZE > 1)
1929     {
1930 zherczeg 839 *ptr = swap_uint16(*ptr);
1931 ph10 836 ptr++;
1932     }
1933    
1934     op = *ptr;
1935     *ptr = swap_uint16(op);
1936 zherczeg 839 ptr++;
1937 ph10 836 if ((op & XCL_MAP) != 0)
1938     {
1939     /* Skip the character bit map. */
1940     ptr += 32/sizeof(pcre_uint16);
1941     length -= 32/sizeof(pcre_uint16);
1942     }
1943     break;
1944     }
1945     }
1946     /* Control should never reach here in 16 bit mode. */
1947     #endif /* SUPPORT_PCRE16 */
1948     }
1949    
1950    
1951    
1952     /*************************************************
1953 nigel 87 * Check match or recursion limit *
1954     *************************************************/
1955    
1956     static int
1957 ph10 836 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1958 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
1959     int flag, unsigned long int *limit, int errnumber, const char *msg)
1960     {
1961     int count;
1962     int min = 0;
1963     int mid = 64;
1964     int max = -1;
1965    
1966     extra->flags |= flag;
1967    
1968     for (;;)
1969     {
1970     *limit = mid;
1971    
1972 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1973 nigel 87 use_offsets, use_size_offsets);
1974    
1975     if (count == errnumber)
1976     {
1977     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1978     min = mid;
1979     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1980     }
1981    
1982     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1983     count == PCRE_ERROR_PARTIAL)
1984     {
1985     if (mid == min + 1)
1986     {
1987     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1988     break;
1989     }
1990     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1991     max = mid;
1992     mid = (min + mid)/2;
1993     }
1994     else break; /* Some other error */
1995     }
1996    
1997     extra->flags &= ~flag;
1998     return count;
1999     }
2000    
2001    
2002    
2003     /*************************************************
2004 ph10 227 * Case-independent strncmp() function *
2005     *************************************************/
2006    
2007     /*
2008     Arguments:
2009     s first string
2010     t second string
2011     n number of characters to compare
2012    
2013     Returns: < 0, = 0, or > 0, according to the comparison
2014     */
2015    
2016     static int
2017 ph10 836 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2018 ph10 227 {
2019     while (n--)
2020     {
2021     int c = tolower(*s++) - tolower(*t++);
2022     if (c) return c;
2023     }
2024     return 0;
2025     }
2026    
2027    
2028    
2029     /*************************************************
2030 nigel 91 * Check newline indicator *
2031     *************************************************/
2032    
2033 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
2034     a message and return 0 if there is no match.
2035 nigel 91
2036     Arguments:
2037     p points after the leading '<'
2038     f file for error message
2039    
2040     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
2041     */
2042    
2043     static int
2044 ph10 836 check_newline(pcre_uint8 *p, FILE *f)
2045 nigel 91 {
2046 ph10 836 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2047     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2048     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2049     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2050     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2051     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2052     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2053 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
2054     return 0;
2055     }
2056    
2057    
2058    
2059     /*************************************************
2060 nigel 93 * Usage function *
2061     *************************************************/
2062    
2063     static void
2064     usage(void)
2065     {
2066 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
2067     printf("Input and output default to stdin and stdout.\n");
2068     #ifdef SUPPORT_LIBREADLINE
2069     printf("If input is a terminal, readline() is used to read from it.\n");
2070     #else
2071     printf("This version of pcretest is not linked with readline().\n");
2072     #endif
2073     printf("\nOptions:\n");
2074 ph10 836 #ifdef SUPPORT_PCRE16
2075     printf(" -16 use 16-bit interface\n");
2076     #endif
2077 nigel 93 printf(" -b show compiled code (bytecode)\n");
2078     printf(" -C show PCRE compile-time options and exit\n");
2079 ph10 836 printf(" -C arg show a specific compile-time option\n");
2080     printf(" and exit with its value. The arg can be:\n");
2081     printf(" linksize internal link size [2, 3, 4]\n");
2082     printf(" pcre8 8 bit library support enabled [0, 1]\n");
2083     printf(" pcre16 16 bit library support enabled [0, 1]\n");
2084     printf(" utf Unicode Transformation Format supported [0, 1]\n");
2085     printf(" ucp Unicode Properties supported [0, 1]\n");
2086     printf(" jit Just-in-time compiler supported [0, 1]\n");
2087 zherczeg 839 printf(" newline Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2088 nigel 93 printf(" -d debug: show compiled code and information (-b and -i)\n");
2089     #if !defined NODFA
2090     printf(" -dfa force DFA matching for all subjects\n");
2091     #endif
2092     printf(" -help show usage information\n");
2093     printf(" -i show information about compiled patterns\n"
2094 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
2095 nigel 93 " -m output memory used information\n"
2096     " -o <n> set size of offsets vector to <n>\n");
2097     #if !defined NOPOSIX
2098     printf(" -p use POSIX interface\n");
2099     #endif
2100     printf(" -q quiet: do not output PCRE version number at start\n");
2101     printf(" -S <n> set stack size to <n> megabytes\n");
2102 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
2103     " -s+ force each pattern to be studied, using JIT if available\n"
2104 nigel 93 " -t time compilation and execution\n");
2105     printf(" -t <n> time compilation and execution, repeating <n> times\n");
2106     printf(" -tm time execution (matching) only\n");
2107     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
2108     }
2109    
2110    
2111    
2112     /*************************************************
2113 nigel 63 * Main Program *
2114     *************************************************/
2115 nigel 43
2116 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
2117     consist of a regular expression, in delimiters and optionally followed by
2118     options, followed by a set of test data, terminated by an empty line. */
2119    
2120     int main(int argc, char **argv)
2121     {
2122     FILE *infile = stdin;
2123 ph10 836 const char *version;
2124 nigel 3 int options = 0;
2125     int study_options = 0;
2126 ph10 386 int default_find_match_limit = FALSE;
2127 nigel 3 int op = 1;
2128     int timeit = 0;
2129 nigel 93 int timeitm = 0;
2130 nigel 3 int showinfo = 0;
2131 nigel 31 int showstore = 0;
2132 ph10 667 int force_study = -1;
2133     int force_study_options = 0;
2134 nigel 87 int quiet = 0;
2135 nigel 53 int size_offsets = 45;
2136     int size_offsets_max;
2137 nigel 77 int *offsets = NULL;
2138 nigel 53 #if !defined NOPOSIX
2139 nigel 3 int posix = 0;
2140 nigel 53 #endif
2141 nigel 3 int debug = 0;
2142 nigel 11 int done = 0;
2143 nigel 77 int all_use_dfa = 0;
2144     int yield = 0;
2145 nigel 91 int stack_size;
2146 nigel 3
2147 ph10 667 pcre_jit_stack *jit_stack = NULL;
2148    
2149 ph10 836 /* These vectors store, end-to-end, a list of zero-terminated captured
2150     substring names, each list itself being terminated by an empty name. Assume
2151     that 1024 is plenty long enough for the few names we'll be testing. It is
2152     easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2153     for the actual memory, to ensure alignment. By defining these variables always
2154     (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2155     #ifdefs in the code. */
2156 ph10 667
2157 ph10 836 pcre_uint16 copynames[1024];
2158     pcre_uint16 getnames[1024];
2159 nigel 69
2160 ph10 836 pcre_uint16 *cn16ptr;
2161     pcre_uint16 *gn16ptr;
2162 nigel 91
2163 ph10 836 pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2164     pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2165     pcre_uint8 *cn8ptr;
2166     pcre_uint8 *gn8ptr;
2167 nigel 91
2168 ph10 836 /* Get buffers from malloc() so that valgrind will check their misuse when
2169     debugging. They grow automatically when very long lines are read. The 16-bit
2170     buffer (buffer16) is obtained only if needed. */
2171 nigel 69
2172 ph10 836 buffer = (pcre_uint8 *)malloc(buffer_size);
2173     dbuffer = (pcre_uint8 *)malloc(buffer_size);
2174     pbuffer = (pcre_uint8 *)malloc(buffer_size);
2175 nigel 69
2176 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
2177 nigel 3
2178 nigel 93 outfile = stdout;
2179    
2180     /* The following _setmode() stuff is some Windows magic that tells its runtime
2181     library to translate CRLF into a single LF character. At least, that's what
2182     I've been told: never having used Windows I take this all on trust. Originally
2183     it set 0x8000, but then I was advised that _O_BINARY was better. */
2184    
2185 nigel 75 #if defined(_WIN32) || defined(WIN32)
2186 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
2187     #endif
2188 nigel 75
2189 ph10 836 /* Get the version number: both pcre_version() and pcre16_version() give the
2190     same answer. We just need to ensure that we call one that is available. */
2191    
2192     #ifdef SUPPORT_PCRE8
2193     version = pcre_version();
2194     #else
2195     version = pcre16_version();
2196     #endif
2197    
2198 nigel 3 /* Scan options */
2199    
2200     while (argc > 1 && argv[op][0] == '-')
2201     {
2202 ph10 836 pcre_uint8 *endptr;
2203 nigel 53
2204 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
2205 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2206 ph10 691 else if (strcmp(argv[op], "-s+") == 0)
2207 ph10 667 {
2208     force_study = 1;
2209     force_study_options = PCRE_STUDY_JIT_COMPILE;
2210 ph10 691 }
2211 ph10 836 else if (strcmp(argv[op], "-16") == 0)
2212     {
2213     #ifdef SUPPORT_PCRE16
2214     use_pcre16 = 1;
2215     #else
2216     printf("** This version of PCRE was built without 16-bit support\n");
2217     exit(1);
2218     #endif
2219     }
2220 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2221 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
2222 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
2223     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
2224 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
2225 nigel 79 #if !defined NODFA
2226 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2227 nigel 79 #endif
2228 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2229 ph10 836 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2230 nigel 65 *endptr == 0))
2231 nigel 53 {
2232     op++;
2233     argc--;
2234     }
2235 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
2236     {
2237     int both = argv[op][2] == 0;
2238     int temp;
2239 ph10 836 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2240 nigel 93 *endptr == 0))
2241     {
2242     timeitm = temp;
2243     op++;
2244     argc--;
2245     }
2246     else timeitm = LOOPREPEAT;
2247     if (both) timeit = timeitm;
2248     }
2249 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2250 ph10 836 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2251 nigel 91 *endptr == 0))
2252     {
2253 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2254 nigel 91 printf("PCRE: -S not supported on this OS\n");
2255     exit(1);
2256     #else
2257     int rc;
2258     struct rlimit rlim;
2259     getrlimit(RLIMIT_STACK, &rlim);
2260     rlim.rlim_cur = stack_size * 1024 * 1024;
2261     rc = setrlimit(RLIMIT_STACK, &rlim);
2262     if (rc != 0)
2263     {
2264     printf("PCRE: setrlimit() failed with error %d\n", rc);
2265     exit(1);
2266     }
2267     op++;
2268     argc--;
2269     #endif
2270     }
2271 nigel 53 #if !defined NOPOSIX
2272 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
2273 nigel 53 #endif
2274 nigel 63 else if (strcmp(argv[op], "-C") == 0)
2275     {
2276     int rc;
2277 ph10 392 unsigned long int lrc;
2278 ph10 836
2279     if (argc > 2)
2280     {
2281     if (strcmp(argv[op + 1], "linksize") == 0)
2282     {
2283     (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2284     printf("%d\n", rc);
2285     yield = rc;
2286     goto EXIT;
2287     }
2288     if (strcmp(argv[op + 1], "pcre8") == 0)
2289     {
2290     #ifdef SUPPORT_PCRE8
2291     printf("1\n");
2292     yield = 1;
2293     #else
2294     printf("0\n");
2295     yield = 0;
2296     #endif
2297     goto EXIT;
2298     }
2299     if (strcmp(argv[op + 1], "pcre16") == 0)
2300     {
2301     #ifdef SUPPORT_PCRE16
2302     printf("1\n");
2303     yield = 1;
2304     #else
2305     printf("0\n");
2306     yield = 0;
2307     #endif
2308     goto EXIT;
2309     }
2310     if (strcmp(argv[op + 1], "utf") == 0)
2311     {
2312     #ifdef SUPPORT_PCRE8
2313     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2314     printf("%d\n", rc);
2315     yield = rc;
2316     #else
2317     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2318     printf("%d\n", rc);
2319     yield = rc;
2320     #endif
2321     goto EXIT;
2322     }
2323     if (strcmp(argv[op + 1], "ucp") == 0)
2324     {
2325     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2326     printf("%d\n", rc);
2327     yield = rc;
2328     goto EXIT;
2329     }
2330     if (strcmp(argv[op + 1], "jit") == 0)
2331     {
2332     (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2333     printf("%d\n", rc);
2334     yield = rc;
2335     goto EXIT;
2336     }
2337 ph10 838 if (strcmp(argv[op + 1], "newline") == 0)
2338 ph10 842 {
2339 ph10 838 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2340     /* Note that these values are always the ASCII values, even
2341     in EBCDIC environments. CR is 13 and NL is 10. */
2342     printf("%s\n", (rc == 13)? "CR" :
2343     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2344     (rc == -2)? "ANYCRLF" :
2345     (rc == -1)? "ANY" : "???");
2346     goto EXIT;
2347 ph10 842 }
2348 ph10 838 printf("Unknown -C option: %s\n", argv[op + 1]);
2349 ph10 836 goto EXIT;
2350     }
2351    
2352     printf("PCRE version %s\n", version);
2353 nigel 63 printf("Compiled with\n");
2354 ph10 836
2355     /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2356     are set, either both UTFs are supported or both are not supported. */
2357    
2358     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2359     printf(" 8-bit and 16-bit support\n");
2360 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2361 ph10 836 if (rc)
2362     printf(" UTF-8 and UTF-16 support\n");
2363     else
2364     printf(" No UTF-8 or UTF-16 support\n");
2365     #elif defined SUPPORT_PCRE8
2366     printf(" 8-bit support only\n");
2367     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2368 nigel 63 printf(" %sUTF-8 support\n", rc? "" : "No ");
2369 ph10 836 #else
2370     printf(" 16-bit support only\n");
2371     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2372     printf(" %sUTF-16 support\n", rc? "" : "No ");
2373     #endif
2374    
2375     (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2376 nigel 75 printf(" %sUnicode properties support\n", rc? "" : "No ");
2377 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2378 ph10 674 if (rc)
2379 ph10 689 printf(" Just-in-time compiler support\n");
2380 ph10 674 else
2381     printf(" No just-in-time compiler support\n");
2382 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2383 ph10 391 /* Note that these values are always the ASCII values, even
2384 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
2385 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
2386     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2387 ph10 150 (rc == -2)? "ANYCRLF" :
2388 nigel 93 (rc == -1)? "ANY" : "???");
2389 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2390 ph10 231 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2391     "all Unicode newlines");
2392 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2393 nigel 63 printf(" Internal link size = %d\n", rc);
2394 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2395 nigel 63 printf(" POSIX malloc threshold = %d\n", rc);
2396 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2397 ph10 376 printf(" Default match limit = %ld\n", lrc);
2398 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2399 ph10 376 printf(" Default recursion depth limit = %ld\n", lrc);
2400 ph10 836 (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2401 nigel 73 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
2402 ph10 121 goto EXIT;
2403 nigel 63 }
2404 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
2405     strcmp(argv[op], "--help") == 0)
2406     {
2407     usage();
2408     goto EXIT;
2409     }
2410 nigel 3 else
2411     {
2412 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
2413 nigel 93 usage();
2414 nigel 77 yield = 1;
2415     goto EXIT;
2416 nigel 3 }
2417     op++;
2418     argc--;
2419     }
2420    
2421 nigel 53 /* Get the store for the offsets vector, and remember what it was */
2422    
2423     size_offsets_max = size_offsets;
2424 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
2425 nigel 53 if (offsets == NULL)
2426     {
2427     printf("** Failed to get %d bytes of memory for offsets vector\n",
2428 ph10 151 (int)(size_offsets_max * sizeof(int)));
2429 nigel 77 yield = 1;
2430     goto EXIT;
2431 nigel 53 }
2432    
2433 nigel 3 /* Sort out the input and output files */
2434    
2435     if (argc > 1)
2436     {
2437 nigel 93 infile = fopen(argv[op], INPUT_MODE);
2438 nigel 3 if (infile == NULL)
2439     {
2440     printf("** Failed to open %s\n", argv[op]);
2441 nigel 77 yield = 1;
2442     goto EXIT;
2443 nigel 3 }
2444     }
2445    
2446     if (argc > 2)
2447     {
2448 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
2449 nigel 3 if (outfile == NULL)
2450     {
2451     printf("** Failed to open %s\n", argv[op+1]);
2452 nigel 77 yield = 1;
2453     goto EXIT;
2454 nigel 3 }
2455     }
2456    
2457     /* Set alternative malloc function */
2458    
2459 ph10 836 #ifdef SUPPORT_PCRE8
2460 nigel 3 pcre_malloc = new_malloc;
2461 nigel 73 pcre_free = new_free;
2462     pcre_stack_malloc = stack_malloc;
2463     pcre_stack_free = stack_free;
2464 ph10 836 #endif
2465 nigel 3
2466 ph10 836 #ifdef SUPPORT_PCRE16
2467     pcre16_malloc = new_malloc;
2468     pcre16_free = new_free;
2469     pcre16_stack_malloc = stack_malloc;
2470     pcre16_stack_free = stack_free;
2471     #endif
2472    
2473 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
2474 nigel 3
2475 ph10 836 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2476 nigel 3
2477     /* Main loop */
2478    
2479 nigel 11 while (!done)
2480 nigel 3 {
2481     pcre *re = NULL;
2482     pcre_extra *extra = NULL;
2483 nigel 37
2484     #if !defined NOPOSIX /* There are still compilers that require no indent */
2485 nigel 3 regex_t preg;
2486 nigel 45 int do_posix = 0;
2487 nigel 37 #endif
2488    
2489 nigel 7 const char *error;
2490 ph10 836 pcre_uint8 *markptr;
2491     pcre_uint8 *p, *pp, *ppp;
2492     pcre_uint8 *to_file = NULL;
2493     const pcre_uint8 *tables = NULL;
2494 zherczeg 847 unsigned long int get_options;
2495 nigel 75 unsigned long int true_size, true_study_size = 0;
2496     size_t size, regex_gotten_store;
2497 ph10 654 int do_allcaps = 0;
2498 ph10 512 int do_mark = 0;
2499 nigel 3 int do_study = 0;
2500 ph10 654 int no_force_study = 0;
2501 nigel 25 int do_debug = debug;
2502 nigel 35 int do_G = 0;
2503     int do_g = 0;
2504 nigel 25 int do_showinfo = showinfo;
2505 nigel 35 int do_showrest = 0;
2506 ph10 616 int do_showcaprest = 0;
2507 nigel 75 int do_flip = 0;
2508 nigel 93 int erroroffset, len, delimiter, poffset;
2509 nigel 3
2510 ph10 836 use_utf = 0;
2511 ph10 211 debug_lengths = 1;
2512 nigel 63
2513 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
2514 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2515 nigel 63 fflush(outfile);
2516 nigel 3
2517     p = buffer;
2518     while (isspace(*p)) p++;
2519     if (*p == 0) continue;
2520    
2521 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
2522 nigel 3
2523 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2524     {
2525 zherczeg 841 pcre_uint32 magic;
2526 ph10 836 pcre_uint8 sbuf[8];
2527 nigel 75 FILE *f;
2528    
2529     p++;
2530 zherczeg 839 if (*p == '!')
2531     {
2532     do_debug = TRUE;
2533     do_showinfo = TRUE;
2534     p++;
2535     }
2536    
2537 nigel 75 pp = p + (int)strlen((char *)p);
2538     while (isspace(pp[-1])) pp--;
2539     *pp = 0;
2540    
2541     f = fopen((char *)p, "rb");
2542     if (f == NULL)
2543     {
2544     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2545     continue;
2546     }
2547    
2548 zherczeg 839 first_gotten_store = 0;
2549 nigel 75 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2550    
2551     true_size =
2552     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2553     true_study_size =
2554     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2555    
2556     re = (real_pcre *)new_malloc(true_size);
2557 ph10 836 regex_gotten_store = first_gotten_store;
2558 nigel 75
2559     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2560    
2561     magic = ((real_pcre *)re)->magic_number;
2562     if (magic != MAGIC_NUMBER)
2563     {
2564 ph10 836 if (swap_uint32(magic) == MAGIC_NUMBER)
2565 nigel 75 {
2566     do_flip = 1;
2567     }
2568     else
2569     {
2570     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2571     fclose(f);
2572     continue;
2573     }
2574     }
2575    
2576 zherczeg 839 /* We hide the byte-invert info for little and big endian tests. */
2577 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2578 zherczeg 839 do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2579 nigel 75
2580 ph10 612 /* Now see if there is any following study data. */
2581 nigel 75
2582     if (true_study_size != 0)
2583     {
2584     pcre_study_data *psd;
2585    
2586     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2587     extra->flags = PCRE_EXTRA_STUDY_DATA;
2588    
2589     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2590     extra->study_data = psd;
2591    
2592     if (fread(psd, 1, true_study_size, f) != true_study_size)
2593     {
2594     FAIL_READ:
2595     fprintf(outfile, "Failed to read data from %s\n", p);
2596 ph10 836 if (extra != NULL)
2597     {
2598     PCRE_FREE_STUDY(extra);
2599     }
2600 nigel 75 if (re != NULL) new_free(re);
2601     fclose(f);
2602     continue;
2603     }
2604     fprintf(outfile, "Study data loaded from %s\n", p);
2605     do_study = 1; /* To get the data output if requested */
2606     }
2607     else fprintf(outfile, "No study data\n");
2608    
2609 ph10 836 /* Flip the necessary bytes. */
2610     if (do_flip)
2611     {
2612 zherczeg 839 int rc;
2613     PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2614     if (rc == PCRE_ERROR_BADMODE)
2615     {
2616     /* Simulate the result of the function call below. */
2617     fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2618     use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2619     fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2620     "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2621     continue;
2622     }
2623 ph10 836 }
2624    
2625     /* Need to know if UTF-8 for printing data strings. */
2626    
2627     if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2628     use_utf = (get_options & PCRE_UTF8) != 0;
2629    
2630 nigel 75 fclose(f);
2631     goto SHOW_INFO;
2632     }
2633    
2634     /* In-line pattern (the usual case). Get the delimiter and seek the end of
2635 ph10 836 the pattern; if it isn't complete, read more. */
2636 nigel 75
2637 nigel 3 delimiter = *p++;
2638    
2639 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
2640 nigel 3 {
2641 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2642 nigel 3 goto SKIP_DATA;
2643     }
2644    
2645     pp = p;
2646 ph10 530 poffset = (int)(p - buffer);
2647 nigel 3
2648     for(;;)
2649     {
2650 nigel 29 while (*pp != 0)
2651     {
2652     if (*pp == '\\' && pp[1] != 0) pp++;
2653     else if (*pp == delimiter) break;
2654     pp++;
2655     }
2656 nigel 3 if (*pp != 0) break;
2657 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
2658 nigel 3 {
2659     fprintf(outfile, "** Unexpected EOF\n");
2660 nigel 11 done = 1;
2661     goto CONTINUE;
2662 nigel 3 }
2663 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2664 nigel 3 }
2665    
2666 nigel 93 /* The buffer may have moved while being extended; reset the start of data
2667     pointer to the correct relative point in the buffer. */
2668    
2669     p = buffer + poffset;
2670    
2671 nigel 29 /* If the first character after the delimiter is backslash, make
2672     the pattern end with backslash. This is purely to provide a way
2673     of testing for the error message when a pattern ends with backslash. */
2674    
2675     if (pp[1] == '\\') *pp++ = '\\';
2676    
2677 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
2678     for callouts. */
2679 nigel 3
2680     *pp++ = 0;
2681 nigel 75 strcpy((char *)pbuffer, (char *)p);
2682 nigel 3
2683     /* Look for options after final delimiter */
2684    
2685     options = 0;
2686 ph10 836 study_options = 0;
2687 nigel 31 log_store = showstore; /* default from command line */
2688    
2689 nigel 3 while (*pp != 0)
2690     {
2691     switch (*pp++)
2692     {
2693 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
2694 nigel 35 case 'g': do_g = 1; break;
2695 nigel 3 case 'i': options |= PCRE_CASELESS; break;
2696     case 'm': options |= PCRE_MULTILINE; break;
2697     case 's': options |= PCRE_DOTALL; break;
2698     case 'x': options |= PCRE_EXTENDED; break;
2699 nigel 25
2700 ph10 616 case '+':
2701 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2702 ph10 616 break;
2703 ph10 654
2704     case '=': do_allcaps = 1; break;
2705 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
2706 nigel 93 case 'B': do_debug = 1; break;
2707 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
2708 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
2709 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2710 nigel 75 case 'F': do_flip = 1; break;
2711 nigel 35 case 'G': do_G = 1; break;
2712 nigel 25 case 'I': do_showinfo = 1; break;
2713 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
2714 ph10 512 case 'K': do_mark = 1; break;
2715 nigel 31 case 'M': log_store = 1; break;
2716 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2717 nigel 37
2718     #if !defined NOPOSIX
2719 nigel 3 case 'P': do_posix = 1; break;
2720 nigel 37 #endif
2721    
2722 ph10 654 case 'S':
2723 ph10 691 if (do_study == 0)
2724 ph10 612 {
2725 ph10 691 do_study = 1;
2726 ph10 667 if (*pp == '+')
2727     {
2728     study_options |= PCRE_STUDY_JIT_COMPILE;
2729 ph10 691 pp++;
2730     }
2731     }
2732 ph10 667 else
2733     {
2734 ph10 612 do_study = 0;
2735     no_force_study = 1;
2736 ph10 654 }
2737 ph10 612 break;
2738    
2739 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
2740 ph10 535 case 'W': options |= PCRE_UCP; break;
2741 nigel 3 case 'X': options |= PCRE_EXTRA; break;
2742 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2743 ph10 126 case 'Z': debug_lengths = 0; break;
2744 ph10 836 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2745 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2746 ph10 545
2747 ph10 541 case 'T':
2748     switch (*pp++)
2749     {
2750     case '0': tables = tables0; break;
2751     case '1': tables = tables1; break;
2752 ph10 545
2753 ph10 541 case '\r':
2754     case '\n':
2755 ph10 545 case ' ':
2756     case 0:
2757 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
2758 ph10 545 goto SKIP_DATA;
2759    
2760     default:
2761 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2762 ph10 545 goto SKIP_DATA;
2763 ph10 541 }
2764 ph10 545 break;
2765 nigel 25
2766     case 'L':
2767     ppp = pp;
2768 nigel 93 /* The '\r' test here is so that it works on Windows. */
2769     /* The '0' test is just in case this is an unterminated line. */
2770     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2771 nigel 25 *ppp = 0;
2772     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2773     {
2774     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2775     goto SKIP_DATA;
2776     }
2777 nigel 93 locale_set = 1;
2778 ph10 836 tables = PCRE_MAKETABLES;
2779 nigel 25 pp = ppp;
2780     break;
2781    
2782 nigel 75 case '>':
2783     to_file = pp;
2784     while (*pp != 0) pp++;
2785     while (isspace(pp[-1])) pp--;
2786     *pp = 0;
2787     break;
2788    
2789 nigel 91 case '<':
2790     {
2791 ph10 836 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2792 ph10 336 {
2793     options |= PCRE_JAVASCRIPT_COMPAT;
2794 ph10 345 pp += 3;
2795 ph10 336 }
2796     else
2797 ph10 345 {
2798 ph10 336 int x = check_newline(pp, outfile);
2799     if (x == 0) goto SKIP_DATA;
2800     options |= x;
2801     while (*pp++ != '>');
2802 ph10 345 }
2803 nigel 91 }
2804     break;
2805    
2806 nigel 77 case '\r': /* So that it works in Windows */
2807     case '\n':
2808     case ' ':
2809     break;
2810 nigel 75
2811 nigel 3 default:
2812     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2813     goto SKIP_DATA;
2814     }
2815     }
2816    
2817 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
2818 nigel 25 timing, showing, or debugging options, nor the ability to pass over
2819 ph10 836 local character tables. Neither does it have 16-bit support. */
2820 nigel 3
2821 nigel 37 #if !defined NOPOSIX
2822 nigel 3 if (posix || do_posix)
2823     {
2824     int rc;
2825     int cflags = 0;
2826 nigel 75
2827 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2828     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2829 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2830 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2831     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2832 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2833 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2834 nigel 87
2835 ph10 836 first_gotten_store = 0;
2836 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
2837    
2838     /* Compilation failed; go back for another re, skipping to blank line
2839     if non-interactive. */
2840    
2841     if (rc != 0)
2842     {
2843 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2844 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2845     goto SKIP_DATA;
2846     }
2847     }
2848    
2849     /* Handle compiling via the native interface */
2850    
2851     else
2852 nigel 37 #endif /* !defined NOPOSIX */
2853    
2854 nigel 3 {
2855 ph10 836 /* In 16-bit mode, convert the input. */
2856    
2857     #ifdef SUPPORT_PCRE16
2858     if (use_pcre16)
2859     {
2860     switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2861     {
2862     case -1:
2863     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2864     "converted to UTF-16\n");
2865     goto SKIP_DATA;
2866    
2867     case -2:
2868     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2869     "cannot be converted to UTF-16\n");
2870     goto SKIP_DATA;
2871 ph10 842
2872 ph10 836 case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2873     fprintf(outfile, "**Failed: character value greater than 0xffff "
2874     "cannot be converted to 16-bit in non-UTF mode\n");
2875 ph10 842 goto SKIP_DATA;
2876 ph10 836
2877     default:
2878     break;
2879     }
2880     p = (pcre_uint8 *)buffer16;
2881     }
2882     #endif
2883    
2884     /* Compile many times when timing */
2885    
2886 nigel 93 if (timeit > 0)
2887 nigel 3 {
2888     register int i;
2889     clock_t time_taken;
2890     clock_t start_time = clock();
2891 nigel 93 for (i = 0; i < timeit; i++)
2892 nigel 3 {
2893 ph10 836 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2894 nigel 3 if (re != NULL) free(re);
2895     }
2896     time_taken = clock() - start_time;
2897 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
2898     (((double)time_taken * 1000.0) / (double)timeit) /
2899 nigel 63 (double)CLOCKS_PER_SEC);
2900 nigel 3 }
2901    
2902 ph10 836 first_gotten_store = 0;
2903     PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2904 nigel 3
2905     /* Compilation failed; go back for another re, skipping to blank line
2906     if non-interactive. */
2907    
2908     if (re == NULL)
2909     {
2910     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2911     SKIP_DATA:
2912     if (infile != stdin)
2913     {
2914     for (;;)
2915     {
2916 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
2917 nigel 11 {
2918     done = 1;
2919     goto CONTINUE;
2920     }
2921 nigel 3 len = (int)strlen((char *)buffer);
2922     while (len > 0 && isspace(buffer[len-1])) len--;
2923     if (len == 0) break;
2924     }
2925     fprintf(outfile, "\n");
2926     }
2927 nigel 25 goto CONTINUE;
2928 nigel 3 }
2929 ph10 416
2930     /* Compilation succeeded. It is now possible to set the UTF-8 option from
2931     within the regex; check for this so that we know how to process the data
2932 ph10 412 lines. */
2933 ph10 416
2934 ph10 836 if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2935     goto SKIP_DATA;
2936     if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2937 nigel 3
2938 ph10 836 /* Extract the size for possible writing before possibly flipping it,
2939     and remember the store that was got. */
2940 nigel 3
2941 ph10 836 true_size = ((real_pcre *)re)->size;
2942     regex_gotten_store = first_gotten_store;
2943    
2944     /* Output code size information if requested */
2945    
2946 nigel 63 if (log_store)
2947     fprintf(outfile, "Memory allocation (code space): %d\n",
2948 ph10 836 (int)(first_gotten_store -
2949 nigel 63 sizeof(real_pcre) -
2950     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2951    
2952 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
2953 ph10 654 help with the matching, unless the pattern has the SS option, which
2954 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
2955     never sensible). */
2956 nigel 75
2957 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
2958 nigel 75 {
2959 nigel 93 if (timeit > 0)
2960 nigel 75 {
2961     register int i;
2962     clock_t time_taken;
2963     clock_t start_time = clock();
2964 nigel 93 for (i = 0; i < timeit; i++)
2965 ph10 836 {
2966     PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2967     }
2968 nigel 75 time_taken = clock() - start_time;
2969 ph10 836 if (extra != NULL)
2970     {
2971     PCRE_FREE_STUDY(extra);
2972     }
2973 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
2974     (((double)time_taken * 1000.0) / (double)timeit) /
2975 nigel 75 (double)CLOCKS_PER_SEC);
2976     }
2977 ph10 836 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2978 nigel 75 if (error != NULL)
2979     fprintf(outfile, "Failed to study: %s\n", error);
2980     else if (extra != NULL)
2981 ph10 836 {
2982 nigel 75 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2983 ph10 836 if (log_store)
2984     {
2985     size_t jitsize;
2986     if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2987     jitsize != 0)
2988     fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2989     }
2990     }
2991 nigel 75 }
2992 ph10 788
2993 ph10 510 /* If /K was present, we set up for handling MARK data. */
2994 ph10 512
2995 ph10 510 if (do_mark)
2996     {
2997     if (extra == NULL)
2998     {
2999     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3000     extra->flags = 0;
3001     }
3002 ph10 512 extra->mark = &markptr;
3003 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
3004 ph10 512 }
3005 nigel 75
3006 ph10 836 /* Extract and display information from the compiled data if required. */
3007 nigel 75
3008     SHOW_INFO:
3009    
3010 nigel 93 if (do_debug)
3011     {
3012     fprintf(outfile, "------------------------------------------------------------------\n");
3013 ph10 836 PCRE_PRINTINT(re, outfile, debug_lengths);
3014 nigel 93 }
3015 ph10 416
3016 ph10 412 /* We already have the options in get_options (see above) */
3017 nigel 93
3018 nigel 25 if (do_showinfo)
3019 nigel 3 {
3020 ph10 412 unsigned long int all_options;
3021 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
3022 ph10 227 hascrorlf;
3023 nigel 63 int nameentrysize, namecount;
3024 ph10 836 const pcre_uint8 *nametable;
3025 nigel 3
3026 ph10 836 if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3027     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3028     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3029     new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3030     new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3031     new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3032     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3033     new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3034     new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3035     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3036     new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3037     != 0)
3038     goto SKIP_DATA;
3039 nigel 43
3040 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
3041 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3042 nigel 77 (int)size, (int)regex_gotten_store);
3043 nigel 43
3044     fprintf(outfile, "Capturing subpattern count = %d\n", count);
3045     if (backrefmax > 0)
3046     fprintf(outfile, "Max back reference = %d\n", backrefmax);
3047 nigel 63
3048     if (namecount > 0)
3049     {
3050     fprintf(outfile, "Named capturing subpatterns:\n");
3051     while (namecount-- > 0)
3052     {
3053 ph10 836 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3054     int imm2_size = use_pcre16 ? 1 : 2;
3055     #else
3056     int imm2_size = IMM2_SIZE;
3057     #endif
3058     int length = (int)STRLEN(nametable + imm2_size);
3059     fprintf(outfile, " ");
3060     PCHARSV(nametable, imm2_size, length, outfile);
3061     while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3062     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3063     fprintf(outfile, "%3d\n", use_pcre16?
3064     (int)(((PCRE_SPTR16)nametable)[0])
3065     :((int)nametable[0] << 8) | (int)nametable[1]);
3066     nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3067     #else
3068     fprintf(outfile, "%3d\n", GET2(nametable, 0));
3069     #ifdef SUPPORT_PCRE8
3070 nigel 63 nametable += nameentrysize;
3071 ph10 836 #else
3072     nametable += nameentrysize * 2;
3073     #endif
3074     #endif
3075 nigel 63 }
3076     }
3077 ph10 172
3078 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3079 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3080 nigel 63
3081 nigel 75 all_options = ((real_pcre *)re)->options;
3082 ph10 836 if (do_flip) all_options = swap_uint32(all_options);
3083 nigel 75
3084 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
3085 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3086 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3087     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3088     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3089     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3090 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3091 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3092 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3093     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3094 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3095     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3096     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3097 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3098 ph10 836 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3099 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3100 ph10 836 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3101 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3102 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3103 ph10 172
3104 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3105 nigel 43
3106 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
3107 nigel 91 {
3108     case PCRE_NEWLINE_CR:
3109     fprintf(outfile, "Forced newline sequence: CR\n");
3110     break;
3111 nigel 43
3112 nigel 91 case PCRE_NEWLINE_LF:
3113     fprintf(outfile, "Forced newline sequence: LF\n");
3114     break;
3115    
3116     case PCRE_NEWLINE_CRLF:
3117     fprintf(outfile, "Forced newline sequence: CRLF\n");
3118     break;
3119    
3120 ph10 149 case PCRE_NEWLINE_ANYCRLF:
3121     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3122     break;
3123    
3124 nigel 93 case PCRE_NEWLINE_ANY:
3125     fprintf(outfile, "Forced newline sequence: ANY\n");
3126     break;
3127    
3128 nigel 91 default:
3129     break;
3130     }
3131    
3132 nigel 43 if (first_char == -1)
3133     {
3134 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
3135 nigel 43 }
3136     else if (first_char < 0)
3137     {
3138     fprintf(outfile, "No first char\n");
3139     }
3140     else
3141     {
3142 ph10 836 const char *caseless =
3143     ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3144 nigel 63 "" : " (caseless)";
3145 ph10 836
3146     if (PRINTOK(first_char))
3147     fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3148 nigel 3 else
3149 ph10 836 {
3150     fprintf(outfile, "First char = ");
3151     pchar(first_char, outfile);
3152     fprintf(outfile, "%s\n", caseless);
3153     }
3154 nigel 43 }
3155 nigel 37
3156 nigel 43 if (need_char < 0)
3157     {
3158     fprintf(outfile, "No need char\n");
3159 nigel 3 }
3160 nigel 43 else
3161     {
3162 ph10 836 const char *caseless =
3163     ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3164 nigel 63 "" : " (caseless)";
3165 ph10 836
3166     if (PRINTOK(need_char))
3167     fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3168 nigel 43 else
3169 ph10 836 {
3170     fprintf(outfile, "Need char = ");
3171     pchar(need_char, outfile);
3172     fprintf(outfile, "%s\n", caseless);
3173     }
3174 nigel 43 }
3175 nigel 75
3176     /* Don't output study size; at present it is in any case a fixed
3177     value, but it varies, depending on the computer architecture, and
3178     so messes up the test suite. (And with the /F option, it might be
3179 ph10 654 flipped.) If study was forced by an external -s, don't show this
3180 ph10 612 information unless -i or -d was also present. This means that, except
3181     when auto-callouts are involved, the output from runs with and without
3182     -s should be identical. */
3183 nigel 75
3184 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3185 nigel 75 {
3186     if (extra == NULL)
3187     fprintf(outfile, "Study returned NULL\n");
3188     else
3189     {
3190 ph10 836 pcre_uint8 *start_bits = NULL;
3191 ph10 455 int minlength;
3192 ph10 461
3193 ph10 836 if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3194     fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3195 ph10 461
3196 ph10 836 if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3197 nigel 75 {
3198 ph10 836 if (start_bits == NULL)
3199     fprintf(outfile, "No set of starting bytes\n");
3200     else
3201 nigel 75 {
3202 ph10 836 int i;
3203     int c = 24;
3204     fprintf(outfile, "Starting byte set: ");
3205     for (i = 0; i < 256; i++)
3206 nigel 75 {
3207 ph10 836 if ((start_bits[i/8] & (1<<(i&7))) != 0)
3208 nigel 75 {
3209 ph10 836 if (c > 75)
3210     {
3211     fprintf(outfile, "\n ");
3212     c = 2;
3213     }
3214     if (PRINTOK(i) && i != ' ')
3215     {
3216     fprintf(outfile, "%c ", i);
3217     c += 2;
3218     }
3219     else
3220     {
3221     fprintf(outfile, "\\x%02x ", i);
3222     c += 5;
3223     }
3224 nigel 75 }
3225     }
3226 ph10 836 fprintf(outfile, "\n");
3227 nigel 75 }
3228     }
3229     }
3230 ph10 691
3231 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
3232 ph10 691
3233 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3234     {
3235 ph10 691 int jit;
3236 ph10 836 if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3237     {
3238     if (jit)
3239     fprintf(outfile, "JIT study was successful\n");
3240     else
3241 ph10 691 #ifdef SUPPORT_JIT
3242 ph10 836 fprintf(outfile, "JIT study was not successful\n");
3243 ph10 667 #else
3244 ph10 836 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3245 ph10 667 #endif
3246 ph10 836 }
3247 ph10 691 }
3248 nigel 75 }
3249 nigel 3 }
3250    
3251 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
3252     that is all. The first 8 bytes of the file are the regex length and then
3253     the study length, in big-endian order. */
3254 nigel 3
3255 nigel 75 if (to_file != NULL)
3256 nigel 3 {
3257 nigel 75 FILE *f = fopen((char *)to_file, "wb");
3258     if (f == NULL)
3259 nigel 3 {
3260 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3261 nigel 3 }
3262 nigel 75 else
3263     {
3264 ph10 836 pcre_uint8 sbuf[8];
3265 ph10 259
3266 ph10 836 if (do_flip) regexflip(re, extra);
3267     sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3268     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3269     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
3270     sbuf[3] = (pcre_uint8)((true_size) & 255);
3271     sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3272     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3273     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
3274     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3275 nigel 3
3276 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
3277     fwrite(re, 1, true_size, f) < true_size)
3278     {
3279     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3280     }
3281 nigel 3 else
3282     {
3283 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3284 ph10 654
3285 ph10 658 /* If there is study data, write it. */
3286 ph10 654
3287 nigel 75 if (extra != NULL)
3288 nigel 3 {
3289 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
3290     true_study_size)
3291 nigel 3 {
3292 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
3293     strerror(errno));
3294 nigel 3 }
3295 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
3296 nigel 3 }
3297     }
3298 nigel 75 fclose(f);
3299 nigel 3 }
3300 nigel 77
3301     new_free(re);
3302 ph10 836 if (extra != NULL)
3303     {
3304     PCRE_FREE_STUDY(extra);
3305     }
3306 ph10 545 if (locale_set)
3307 ph10 541 {
3308     new_free((void *)tables);
3309     setlocale(LC_CTYPE, "C");
3310 ph10 545 locale_set = 0;
3311     }
3312 nigel 75 continue; /* With next regex */
3313 nigel 3 }
3314 nigel 75 } /* End of non-POSIX compile */
3315 nigel 3
3316     /* Read data lines and test them */
3317    
3318     for (;;)
3319     {
3320 ph10 836 pcre_uint8 *q;
3321     pcre_uint8 *bptr;
3322 nigel 57 int *use_offsets = offsets;
3323 nigel 53 int use_size_offsets = size_offsets;
3324 nigel 63 int callout_data = 0;
3325     int callout_data_set = 0;
3326 nigel 3 int count, c;
3327 nigel 29 int copystrings = 0;
3328 ph10 386 int find_match_limit = default_find_match_limit;
3329 nigel 29 int getstrings = 0;
3330     int getlist = 0;
3331 nigel 39 int gmatched = 0;
3332 nigel 35 int start_offset = 0;
3333 ph10 579 int start_offset_sign = 1;
3334 nigel 41 int g_notempty = 0;
3335 nigel 77 int use_dfa = 0;
3336 nigel 3
3337 nigel 91 *copynames = 0;
3338     *getnames = 0;
3339    
3340 ph10 836 cn16ptr = copynames;
3341     gn16ptr = getnames;
3342     cn8ptr = copynames8;
3343     gn8ptr = getnames8;
3344 nigel 91
3345 ph10 836 SET_PCRE_CALLOUT(callout);
3346 nigel 63 first_callout = 1;
3347 ph10 654 last_callout_mark = NULL;
3348 nigel 63 callout_extra = 0;
3349     callout_count = 0;
3350     callout_fail_count = 999999;
3351     callout_fail_id = -1;
3352 nigel 73 show_malloc = 0;
3353 ph10 836 options = 0;
3354 nigel 63
3355 nigel 91 if (extra != NULL) extra->flags &=
3356     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3357    
3358     len = 0;
3359     for (;;)
3360 nigel 11 {
3361 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3362 nigel 91 {
3363 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
3364     {
3365 ph10 545 fprintf(outfile, "\n");
3366 ph10 537 break;
3367 ph10 545 }
3368 nigel 91 done = 1;
3369     goto CONTINUE;
3370     }
3371     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3372     len = (int)strlen((char *)buffer);
3373     if (buffer[len-1] == '\n') break;
3374 nigel 11 }
3375 nigel 3
3376     while (len > 0 && isspace(buffer[len-1])) len--;
3377     buffer[len] = 0;
3378     if (len == 0) break;
3379    
3380     p = buffer;
3381     while (isspace(*p)) p++;
3382    
3383 ph10 147 bptr = q = dbuffer;
3384 nigel 3 while ((c = *p++) != 0)
3385     {
3386     int i = 0;
3387     int n = 0;
3388 ph10 842
3389 ph10 836 /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3390     In non-UTF mode, allow the value of the byte to fall through to later,
3391     where values greater than 127 are turned into UTF-8 when running in
3392     16-bit mode. */
3393 ph10 842
3394 ph10 836 if (c != '\\')
3395 nigel 3 {
3396 ph10 836 if (use_utf)
3397     {
3398     *q++ = c;
3399     continue;
3400 ph10 842 }
3401     }
3402    
3403 ph10 836 /* Handle backslash escapes */
3404 ph10 842
3405 ph10 836 else switch ((c = *p++))
3406     {
3407 nigel 3 case 'a': c = 7; break;
3408     case 'b': c = '\b'; break;
3409     case 'e': c = 27; break;
3410     case 'f': c = '\f'; break;
3411     case 'n': c = '\n'; break;
3412     case 'r': c = '\r'; break;
3413     case 't': c = '\t'; break;
3414     case 'v': c = '\v'; break;
3415    
3416     case '0': case '1': case '2': case '3':
3417     case '4': case '5': case '6': case '7':
3418     c -= '0';
3419     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3420     c = c * 8 + *p++ - '0';
3421     break;
3422    
3423     case 'x':
3424 nigel 49 if (*p == '{')
3425     {
3426 ph10 836 pcre_uint8 *pt = p;
3427 nigel 49 c = 0;
3428 ph10 738
3429 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3430     when isxdigit() is a macro that refers to its argument more than
3431     once. This is banned by the C Standard, but apparently happens in at
3432     least one MacOS environment. */
3433 ph10 738
3434 ph10 735 for (pt++; isxdigit(*pt); pt++)
3435 ph10 734 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3436 nigel 49 if (*pt == '}')
3437     {
3438     p = pt + 1;
3439     break;
3440     }
3441 ph10 836 /* Not correct form for \x{...}; fall through */
3442 nigel 49 }
3443    
3444 ph10 842 /* \x without {} always defines just one byte in 8-bit mode. This
3445     allows UTF-8 characters to be constructed byte by byte, and also allows
3446     invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3447     Otherwise, pass it down to later code so that it can be turned into
3448 ph10 836 UTF-8 when running in 16-bit mode. */
3449 nigel 49
3450 nigel 3 c = 0;
3451     while (i++ < 2 && isxdigit(*p))
3452     {
3453 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3454 nigel 3 p++;
3455     }
3456 ph10 836 if (use_utf)
3457 ph10 842 {
3458 ph10 836 *q++ = c;
3459 ph10 842 continue;
3460     }
3461 nigel 3 break;
3462    
3463 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
3464 nigel 3 p--;
3465     continue;
3466    
3467 nigel 75 case '>':
3468 ph10 579 if (*p == '-')
3469 ph10 567 {
3470     start_offset_sign = -1;
3471     p++;
3472 ph10 579 }
3473 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3474 ph10 579 start_offset *= start_offset_sign;
3475 nigel 75 continue;
3476    
3477 nigel 3 case 'A': /* Option setting */
3478     options |= PCRE_ANCHORED;
3479     continue;
3480    
3481     case 'B':
3482     options |= PCRE_NOTBOL;
3483     continue;
3484    
3485 nigel 29 case 'C':
3486 nigel 63 if (isdigit(*p)) /* Set copy string */
3487     {
3488     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3489     copystrings |= 1 << n;
3490     }
3491     else if (isalnum(*p))
3492     {
3493 ph10 836 READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3494 nigel 63 }
3495     else if (*p == '+')
3496     {
3497     callout_extra = 1;
3498     p++;
3499     }
3500     else if (*p == '-')
3501     {
3502 ph10 836 SET_PCRE_CALLOUT(NULL);
3503 nigel 63 p++;
3504     }
3505     else if (*p == '!')
3506     {
3507     callout_fail_id = 0;
3508     p++;
3509     while(isdigit(*p))
3510     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3511     callout_fail_count = 0;
3512     if (*p == '!')
3513     {
3514     p++;
3515     while(isdigit(*p))
3516     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3517     }
3518     }
3519     else if (*p == '*')
3520     {
3521     int sign = 1;
3522     callout_data = 0;
3523     if (*(++p) == '-') { sign = -1; p++; }
3524     while(isdigit(*p))
3525     callout_data = callout_data * 10 + *p++ - '0';
3526     callout_data *= sign;
3527     callout_data_set = 1;
3528     }
3529 nigel 29 continue;
3530    
3531 nigel 79 #if !defined NODFA
3532 nigel 77 case 'D':
3533 nigel 79 #if !defined NOPOSIX
3534 nigel 77 if (posix || do_posix)
3535     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3536     else
3537 nigel 79 #endif
3538 nigel 77 use_dfa = 1;
3539     continue;
3540 ph10 553 #endif
3541 nigel 77
3542 ph10 553 #if !defined NODFA
3543 nigel 77 case 'F':
3544     options |= PCRE_DFA_SHORTEST;
3545     continue;
3546 nigel 79 #endif
3547 nigel 77
3548 nigel 29 case 'G':
3549 nigel 63 if (isdigit(*p))
3550     {
3551     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3552     getstrings |= 1 << n;
3553     }
3554     else if (isalnum(*p))
3555     {
3556 ph10 836 READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3557 nigel 63 }
3558 nigel 29 continue;
3559 ph10 691
3560 ph10 667 case 'J':
3561     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3562 ph10 691 if (extra != NULL
3563     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3564 ph10 667 && extra->executable_jit != NULL)
3565 ph10 691 {
3566 ph10 836 if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3567     jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3568     PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3569 ph10 691 }
3570 ph10 667 continue;
3571 nigel 29
3572     case 'L':
3573     getlist = 1;
3574     continue;
3575    
3576 nigel 63 case 'M':
3577     find_match_limit = 1;
3578     continue;
3579    
3580 nigel 37 case 'N':
3581 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
3582     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3583 ph10 461 else
3584 ph10 442 options |= PCRE_NOTEMPTY;
3585 nigel 37 continue;
3586    
3587 nigel 3 case 'O':
3588     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3589 nigel 53 if (n > size_offsets_max)
3590     {
3591     size_offsets_max = n;
3592 nigel 57 free(offsets);
3593 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3594 nigel 53 if (offsets == NULL)
3595     {
3596     printf("** Failed to get %d bytes of memory for offsets vector\n",
3597 ph10 151 (int)(size_offsets_max * sizeof(int)));
3598 nigel 77 yield = 1;
3599     goto EXIT;
3600 nigel 53 }
3601     }
3602     use_size_offsets = n;
3603 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
3604 nigel 3 continue;
3605    
3606 nigel 75 case 'P':
3607 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3608 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3609 nigel 75 continue;
3610    
3611 nigel 91 case 'Q':
3612     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3613     if (extra == NULL)
3614     {
3615     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3616     extra->flags = 0;
3617     }
3618     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3619     extra->match_limit_recursion = n;
3620     continue;
3621    
3622     case 'q':
3623     while(isdigit(*p)) n = n * 10 + *p++ - '0';
3624     if (extra == NULL)
3625     {
3626     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3627     extra->flags = 0;
3628     }
3629     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3630     extra->match_limit = n;
3631     continue;
3632    
3633 nigel 79 #if !defined NODFA
3634 nigel 77 case 'R':
3635     options |= PCRE_DFA_RESTART;
3636     continue;
3637 nigel 79 #endif
3638 nigel 77
3639 nigel 73 case 'S':
3640     show_malloc = 1;
3641     continue;
3642 ph10 392
3643 ph10 389 case 'Y':
3644     options |= PCRE_NO_START_OPTIMIZE;
3645 ph10 392 continue;
3646 nigel 73
3647 nigel 3 case 'Z':
3648     options |= PCRE_NOTEOL;
3649     continue;
3650 nigel 71
3651     case '?':
3652     options |= PCRE_NO_UTF8_CHECK;
3653     continue;
3654 nigel 91
3655     case '<':
3656     {
3657     int x = check_newline(p, outfile);
3658     if (x == 0) goto NEXT_DATA;
3659     options |= x;
3660     while (*p++ != '>');
3661     }
3662     continue;
3663 nigel 3 }
3664 ph10 836
3665 ph10 842 /* We now have a character value in c that may be greater than 255. In
3666     16-bit mode, we always convert characters to UTF-8 so that values greater
3667 ph10 836 than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3668 ph10 842 convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3669 ph10 836 mode must have come from \x{...} or octal constructs because values from
3670     \x.. get this far only in non-UTF mode. */
3671    
3672 ph10 842 #if !defined NOUTF || defined SUPPORT_PCRE16
3673 ph10 836 if (use_pcre16 || use_utf)
3674     {
3675     pcre_uint8 buff8[8];
3676     int ii, utn;
3677     utn = ord2utf8(c, buff8);
3678     for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3679     }
3680     else
3681 ph10 842 #endif
3682 ph10 836 {
3683     if (c > 255)
3684     {
3685     fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3686     "and UTF-8 mode is not enabled.\n", c);
3687     fprintf(outfile, "** Truncation will probably give the wrong "
3688     "result.\n");
3689     }
3690     *q++ = c;
3691     }
3692 nigel 3 }
3693 ph10 842
3694 ph10 836 /* Reached end of subject string */
3695 ph10 842
3696 nigel 9 *q = 0;
3697 ph10 530 len = (int)(q - dbuffer);
3698 ph10 545
3699 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
3700 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3701 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
3702 ph10 371
3703 ph10 363 #if !defined NOPOSIX
3704     if (posix || do_posix)
3705     {
3706     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3707 ph10 371 bptr += buffer_size - len - 1;
3708 ph10 363 }
3709 ph10 371 else
3710     #endif
3711 ph10 363 {
3712     memmove(bptr + buffer_size - len, bptr, len);
3713 ph10 371 bptr += buffer_size - len;
3714     }
3715 nigel 3
3716 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
3717     {
3718     printf("**Match limit not relevant for DFA matching: ignored\n");
3719     find_match_limit = 0;
3720     }
3721    
3722 nigel 3 /* Handle matching via the POSIX interface, which does not
3723 nigel 63 support timing or playing with the match limit or callout data. */
3724 nigel 3
3725 nigel 37 #if !defined NOPOSIX
3726 nigel 3 if (posix || do_posix)
3727     {
3728     int rc;
3729     int eflags = 0;
3730 nigel 63 regmatch_t *pmatch = NULL;
3731     if (use_size_offsets > 0)
3732 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3733 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3734     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3735 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3736 nigel 3
3737 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3738 nigel 3
3739     if (rc != 0)
3740     {
3741 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3742 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3743     }
3744 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3745     != 0)
3746     {
3747     fprintf(outfile, "Matched with REG_NOSUB\n");
3748     }
3749 nigel 3 else
3750     {
3751 nigel 7 size_t i;
3752 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
3753 nigel 3 {
3754     if (pmatch[i].rm_so >= 0)
3755     {
3756 nigel 23 fprintf(outfile, "%2d: ", (int)i);
3757 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_so,
3758 nigel 63 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3759 nigel 3 fprintf(outfile, "\n");
3760 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3761 nigel 35 {
3762 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
3763 ph10 836 PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3764 nigel 63 outfile);
3765 nigel 35 fprintf(outfile, "\n");
3766     }
3767 nigel 3 }
3768     }
3769     }
3770 nigel 53 free(pmatch);
3771 ph10 836 goto NEXT_DATA;
3772 nigel 3 }
3773    
3774 ph10 836 #endif /* !defined NOPOSIX */
3775    
3776 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
3777 nigel 3
3778 ph10 836 #ifdef SUPPORT_PCRE16
3779     if (use_pcre16)
3780     {
3781     len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3782     switch(len)
3783     {
3784     case -1:
3785     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3786     "converted to UTF-16\n");
3787     goto NEXT_DATA;
3788 nigel 37
3789 ph10 836 case -2:
3790     fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3791     "cannot be converted to UTF-16\n");
3792     goto NEXT_DATA;
3793    
3794     case -3:
3795     fprintf(outfile, "**Failed: character value greater than 0xffff "
3796     "cannot be converted to 16-bit in non-UTF mode\n");
3797 ph10 842 goto NEXT_DATA;
3798 ph10 836
3799     default:
3800     break;
3801     }
3802     bptr = (pcre_uint8 *)buffer16;
3803     }
3804     #endif
3805    
3806 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
3807 nigel 3 {
3808 ph10 512 markptr = NULL;
3809    
3810 nigel 93 if (timeitm > 0)
3811 nigel 3 {
3812     register int i;
3813     clock_t time_taken;
3814     clock_t start_time = clock();
3815 nigel 77
3816 nigel 79 #if !defined NODFA
3817 nigel 77 if (all_use_dfa || use_dfa)
3818     {
3819     int workspace[1000];
3820 nigel 93 for (i = 0; i < timeitm; i++)
3821 ph10 836 {
3822     PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3823     (options | g_notempty), use_offsets, use_size_offsets, workspace,
3824     (sizeof(workspace)/sizeof(int)));
3825     }
3826 nigel 77 }
3827     else
3828 nigel 79 #endif
3829 nigel 77
3830 nigel 93 for (i = 0; i < timeitm; i++)
3831 ph10 836 {
3832     PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3833     (options | g_notempty), use_offsets, use_size_offsets);
3834     }
3835 nigel 3 time_taken = clock() - start_time;
3836 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
3837     (((double)time_taken * 1000.0) / (double)timeitm) /
3838 nigel 63 (double)CLOCKS_PER_SEC);
3839 nigel 3 }
3840    
3841 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
3842 nigel 87 varying limits in order to find the minimum value for the match limit and
3843 ph10 667 for the recursion limit. The match limits are relevant only to the normal
3844     running of pcre_exec(), so disable the JIT optimization. This makes it
3845     possible to run the same set of tests with and without JIT externally
3846     requested. */
3847 nigel 63
3848     if (find_match_limit)
3849     {
3850     if (extra == NULL)
3851     {
3852 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3853 nigel 63 extra->flags = 0;
3854     }
3855 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3856 ph10 691
3857 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
3858 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
3859     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3860     PCRE_ERROR_MATCHLIMIT, "match()");
3861 nigel 63
3862 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
3863     options|g_notempty, use_offsets, use_size_offsets,
3864     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3865     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3866 nigel 63 }
3867    
3868     /* If callout_data is set, use the interface with additional data */
3869    
3870     else if (callout_data_set)
3871     {
3872     if (extra == NULL)
3873     {
3874 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3875 nigel 63 extra->flags = 0;
3876     }
3877     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3878 nigel 71 extra->callout_data = &callout_data;
3879 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3880 nigel 63 options | g_notempty, use_offsets, use_size_offsets);
3881     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3882     }
3883    
3884     /* The normal case is just to do the match once, with the default
3885     value of match_limit. */
3886    
3887 nigel 79 #if !defined NODFA
3888 nigel 77 else if (all_use_dfa || use_dfa)
3889     {
3890     int workspace[1000];
3891 ph10 836 PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3892     (options | g_notempty), use_offsets, use_size_offsets, workspace,
3893     (sizeof(workspace)/sizeof(int)));
3894 nigel 77 if (count == 0)
3895     {
3896     fprintf(outfile, "Matched, but too many subsidiary matches\n");
3897     count = use_size_offsets/2;
3898     }
3899     }
3900 nigel 79 #endif
3901 nigel 77
3902 nigel 75 else
3903     {
3904 ph10 836 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3905     options | g_notempty, use_offsets, use_size_offsets);
3906 nigel 77 if (count == 0)
3907     {
3908     fprintf(outfile, "Matched, but too many substrings\n");
3909     count = use_size_offsets/3;
3910     }
3911 nigel 75 }
3912 nigel 3
3913 nigel 39 /* Matched */
3914    
3915 nigel 3 if (count >= 0)
3916     {
3917 nigel 93 int i, maxcount;
3918 ph10 836 void *cnptr, *gnptr;
3919 nigel 93
3920     #if !defined NODFA
3921     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3922     #endif
3923     maxcount = use_size_offsets/3;
3924    
3925     /* This is a check against a lunatic return value. */
3926    
3927     if (count > maxcount)
3928     {
3929     fprintf(outfile,
3930     "** PCRE error: returned count %d is too big for offset size %d\n",
3931     count, use_size_offsets);
3932     count = use_size_offsets/3;
3933     if (do_g || do_G)
3934     {
3935     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3936     do_g = do_G = FALSE; /* Break g/G loop */
3937     }
3938     }
3939 ph10 654
3940 ph10 626 /* do_allcaps requests showing of all captures in the pattern, to check
3941     unset ones at the end. */
3942 ph10 654
3943 ph10 626 if (do_allcaps)
3944     {
3945 ph10 836 if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3946     goto SKIP_DATA;
3947 ph10 654 count++; /* Allow for full match */
3948     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3949     }
3950 nigel 93
3951 ph10 626 /* Output the captured substrings */
3952 ph10 654
3953 nigel 29 for (i = 0; i < count * 2; i += 2)
3954 nigel 3 {
3955 nigel 57 if (use_offsets[i] < 0)
3956 ph10 654 {
3957 ph10 626 if (use_offsets[i] != -1)
3958     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3959 ph10 654 use_offsets[i], i);
3960 ph10 626 if (use_offsets[i+1] != -1)
3961     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3962 ph10 654 use_offsets[i+1], i+1);
3963 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
3964 ph10 654 }
3965 nigel 3 else
3966     {
3967     fprintf(outfile, "%2d: ", i/2);
3968 ph10 836 PCHARSV(bptr, use_offsets[i],
3969 nigel 63 use_offsets[i+1] - use_offsets[i], outfile);
3970 nigel 3 fprintf(outfile, "\n");
3971 ph10