/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 9 by nigel, Sat Feb 24 21:38:13 2007 UTC revision 975 by ph10, Sat Jun 2 11:03:06 2012 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    #ifdef HAVE_CONFIG_H
50    #include "config.h"
51    #endif
52    
53  #include <ctype.h>  #include <ctype.h>
54  #include <stdio.h>  #include <stdio.h>
55  #include <string.h>  #include <string.h>
56  #include <stdlib.h>  #include <stdlib.h>
57  #include <time.h>  #include <time.h>
58    #include <locale.h>
59    #include <errno.h>
60    
61    /* Both libreadline and libedit are optionally supported. The user-supplied
62    original patch uses readline/readline.h for libedit, but in at least one system
63    it is installed as editline/readline.h, so the configuration code now looks for
64    that first, falling back to readline/readline.h. */
65    
66    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
67    #ifdef HAVE_UNISTD_H
68    #include <unistd.h>
69    #endif
70    #if defined(SUPPORT_LIBREADLINE)
71    #include <readline/readline.h>
72    #include <readline/history.h>
73    #else
74    #if defined(HAVE_EDITLINE_READLINE_H)
75    #include <editline/readline.h>
76    #else
77    #include <readline/readline.h>
78    #endif
79    #endif
80    #endif
81    
82    /* A number of things vary for Windows builds. Originally, pcretest opened its
83    input and output without "b"; then I was told that "b" was needed in some
84    environments, so it was added for release 5.0 to both the input and output. (It
85    makes no difference on Unix-like systems.) Later I was told that it is wrong
86    for the input on Windows. I've now abstracted the modes into two macros that
87    are set here, to make it easier to fiddle with them, and removed "b" from the
88    input mode under Windows. */
89    
90    #if defined(_WIN32) || defined(WIN32)
91    #include <io.h>                /* For _setmode() */
92    #include <fcntl.h>             /* For _O_BINARY */
93    #define INPUT_MODE   "r"
94    #define OUTPUT_MODE  "wb"
95    
96    #ifndef isatty
97    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
98    #endif                         /* though in some environments they seem to   */
99                                   /* be already defined, hence the #ifndefs.    */
100    #ifndef fileno
101    #define fileno _fileno
102    #endif
103    
104    /* A user sent this fix for Borland Builder 5 under Windows. */
105    
106    #ifdef __BORLANDC__
107    #define _setmode(handle, mode) setmode(handle, mode)
108    #endif
109    
110    /* Not Windows */
111    
112    #else
113    #include <sys/time.h>          /* These two includes are needed */
114    #include <sys/resource.h>      /* for setrlimit(). */
115    #define INPUT_MODE   "rb"
116    #define OUTPUT_MODE  "wb"
117    #endif
118    
119    #define PRIV(name) name
120    
121    /* We have to include pcre_internal.h because we need the internal info for
122    displaying the results of pcre_study() and we also need to know about the
123    internal macros, structures, and other internal data values; pcretest has
124    "inside information" compared to a program that strictly follows the PCRE API.
125    
126    Although pcre_internal.h does itself include pcre.h, we explicitly include it
127    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
128    appropriately for an application, not for building PCRE. */
129    
130    #include "pcre.h"
131    
132    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
133    /* Configure internal macros to 16 bit mode. */
134    #define COMPILE_PCRE16
135    #endif
136    
137    #include "pcre_internal.h"
138    
139    /* The pcre_printint() function, which prints the internal form of a compiled
140    regex, is held in a separate file so that (a) it can be compiled in either
141    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
142    when that is compiled in debug mode. */
143    
144    #ifdef SUPPORT_PCRE8
145    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
146    #endif
147    #ifdef SUPPORT_PCRE16
148    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
149    #endif
150    
151    /* We need access to some of the data tables that PCRE uses. So as not to have
152    to keep two copies, we include the source file here, changing the names of the
153    external symbols to prevent clashes. */
154    
155    #define PCRE_INCLUDED
156    
157    #include "pcre_tables.c"
158    
159    /* The definition of the macro PRINTABLE, which determines whether to print an
160    output character as-is or as a hex value when showing compiled patterns, is
161    the same as in the printint.src file. We uses it here in cases when the locale
162    has not been explicitly changed, so as to get consistent output from systems
163    that differ in their output from isprint() even in the "C" locale. */
164    
165    #ifdef EBCDIC
166    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
167    #else
168    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
169    #endif
170    
171    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
172    
173  /* Use the internal info for displaying the results of pcre_study(). */  /* Posix support is disabled in 16 bit only mode. */
174    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
175    #define NOPOSIX
176    #endif
177    
178    /* It is possible to compile this test program without including support for
179    testing the POSIX interface, though this is not available via the standard
180    Makefile. */
181    
182  #include "internal.h"  #if !defined NOPOSIX
183  #include "pcreposix.h"  #include "pcreposix.h"
184    #endif
185    
186    /* It is also possible, originally for the benefit of a version that was
187    imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
188    NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
189    automatically cut out the UTF support if PCRE is built without it. */
190    
191    #ifndef SUPPORT_UTF
192    #ifndef NOUTF
193    #define NOUTF
194    #endif
195    #endif
196    
197    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
198    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
199    only from one place and is handled differently). I couldn't dream up any way of
200    using a single macro to do this in a generic way, because of the many different
201    argument requirements. We know that at least one of SUPPORT_PCRE8 and
202    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
203    use these in the definitions of generic macros.
204    
205    **** Special note about the PCHARSxxx macros: the address of the string to be
206    printed is always given as two arguments: a base address followed by an offset.
207    The base address is cast to the correct data size for 8 or 16 bit data; the
208    offset is in units of this size. If the string were given as base+offset in one
209    argument, the casting might be incorrectly applied. */
210    
211    #ifdef SUPPORT_PCRE8
212    
213    #define PCHARS8(lv, p, offset, len, f) \
214      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
215    
216    #define PCHARSV8(p, offset, len, f) \
217      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
218    
219    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
220      p = read_capture_name8(p, cn8, re)
221    
222    #define STRLEN8(p) ((int)strlen((char *)p))
223    
224    #define SET_PCRE_CALLOUT8(callout) \
225      pcre_callout = callout
226    
227    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
228       pcre_assign_jit_stack(extra, callback, userdata)
229    
230    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
231      re = pcre_compile((char *)pat, options, error, erroffset, tables)
232    
233    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
234        namesptr, cbuffer, size) \
235      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
236        (char *)namesptr, cbuffer, size)
237    
238    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
239      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
240    
241    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
242        offsets, size_offsets, workspace, size_workspace) \
243      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
244        offsets, size_offsets, workspace, size_workspace)
245    
246    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
247        offsets, size_offsets) \
248      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
249        offsets, size_offsets)
250    
251    #define PCRE_FREE_STUDY8(extra) \
252      pcre_free_study(extra)
253    
254    #define PCRE_FREE_SUBSTRING8(substring) \
255      pcre_free_substring(substring)
256    
257    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
258      pcre_free_substring_list(listptr)
259    
260    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
261        getnamesptr, subsptr) \
262      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
263        (char *)getnamesptr, subsptr)
264    
265    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
266      n = pcre_get_stringnumber(re, (char *)ptr)
267    
268    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
269      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
270    
271    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
272      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
273    
274    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
275      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
276    
277    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
278      pcre_printint(re, outfile, debug_lengths)
279    
280    #define PCRE_STUDY8(extra, re, options, error) \
281      extra = pcre_study(re, options, error)
282    
283    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
284      pcre_jit_stack_alloc(startsize, maxsize)
285    
286    #define PCRE_JIT_STACK_FREE8(stack) \
287      pcre_jit_stack_free(stack)
288    
289    #endif /* SUPPORT_PCRE8 */
290    
291    /* -----------------------------------------------------------*/
292    
293    #ifdef SUPPORT_PCRE16
294    
295    #define PCHARS16(lv, p, offset, len, f) \
296      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
297    
298    #define PCHARSV16(p, offset, len, f) \
299      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
300    
301    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
302      p = read_capture_name16(p, cn16, re)
303    
304    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
305    
306    #define SET_PCRE_CALLOUT16(callout) \
307      pcre16_callout = (int (*)(pcre16_callout_block *))callout
308    
309    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
310      pcre16_assign_jit_stack((pcre16_extra *)extra, \
311        (pcre16_jit_callback)callback, userdata)
312    
313    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
314      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
315        tables)
316    
317    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
318        namesptr, cbuffer, size) \
319      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
320        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
321    
322    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
323      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
324        (PCRE_UCHAR16 *)cbuffer, size/2)
325    
326    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
327        offsets, size_offsets, workspace, size_workspace) \
328      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
329        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
330        workspace, size_workspace)
331    
332    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
333        offsets, size_offsets) \
334      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
335        len, start_offset, options, offsets, size_offsets)
336    
337    #define PCRE_FREE_STUDY16(extra) \
338      pcre16_free_study((pcre16_extra *)extra)
339    
340    #define PCRE_FREE_SUBSTRING16(substring) \
341      pcre16_free_substring((PCRE_SPTR16)substring)
342    
343    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
344      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
345    
346    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
347        getnamesptr, subsptr) \
348      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
349        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
350    
351    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
352      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
353    
354    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
355      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
356        (PCRE_SPTR16 *)(void*)subsptr)
357    
358    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
359      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
360        (PCRE_SPTR16 **)(void*)listptr)
361    
362    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
363      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
364        tables)
365    
366    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
367      pcre16_printint(re, outfile, debug_lengths)
368    
369    #define PCRE_STUDY16(extra, re, options, error) \
370      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
371    
372    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
373      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
374    
375    #define PCRE_JIT_STACK_FREE16(stack) \
376      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
377    
378    #endif /* SUPPORT_PCRE16 */
379    
380    
381    /* ----- Both modes are supported; a runtime test is needed, except for
382    pcre_config(), and the JIT stack functions, when it doesn't matter which
383    version is called. ----- */
384    
385    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
386    
387    #define CHAR_SIZE (use_pcre16? 2:1)
388    
389    #define PCHARS(lv, p, offset, len, f) \
390      if (use_pcre16) \
391        PCHARS16(lv, p, offset, len, f); \
392      else \
393        PCHARS8(lv, p, offset, len, f)
394    
395    #define PCHARSV(p, offset, len, f) \
396      if (use_pcre16) \
397        PCHARSV16(p, offset, len, f); \
398      else \
399        PCHARSV8(p, offset, len, f)
400    
401    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
402      if (use_pcre16) \
403        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
404      else \
405        READ_CAPTURE_NAME8(p, cn8, cn16, re)
406    
407    #define SET_PCRE_CALLOUT(callout) \
408      if (use_pcre16) \
409        SET_PCRE_CALLOUT16(callout); \
410      else \
411        SET_PCRE_CALLOUT8(callout)
412    
413    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
414    
415    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
416      if (use_pcre16) \
417        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
418      else \
419        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
420    
421    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
422      if (use_pcre16) \
423        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
424      else \
425        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
426    
427    #define PCRE_CONFIG pcre_config
428    
429    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
430        namesptr, cbuffer, size) \
431      if (use_pcre16) \
432        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
433          namesptr, cbuffer, size); \
434      else \
435        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
436          namesptr, cbuffer, size)
437    
438    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
439      if (use_pcre16) \
440        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
441      else \
442        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
443    
444    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
445        offsets, size_offsets, workspace, size_workspace) \
446      if (use_pcre16) \
447        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
448          offsets, size_offsets, workspace, size_workspace); \
449      else \
450        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
451          offsets, size_offsets, workspace, size_workspace)
452    
453    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
454        offsets, size_offsets) \
455      if (use_pcre16) \
456        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
457          offsets, size_offsets); \
458      else \
459        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
460          offsets, size_offsets)
461    
462    #define PCRE_FREE_STUDY(extra) \
463      if (use_pcre16) \
464        PCRE_FREE_STUDY16(extra); \
465      else \
466        PCRE_FREE_STUDY8(extra)
467    
468    #define PCRE_FREE_SUBSTRING(substring) \
469      if (use_pcre16) \
470        PCRE_FREE_SUBSTRING16(substring); \
471      else \
472        PCRE_FREE_SUBSTRING8(substring)
473    
474    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
475      if (use_pcre16) \
476        PCRE_FREE_SUBSTRING_LIST16(listptr); \
477      else \
478        PCRE_FREE_SUBSTRING_LIST8(listptr)
479    
480    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
481        getnamesptr, subsptr) \
482      if (use_pcre16) \
483        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
484          getnamesptr, subsptr); \
485      else \
486        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
487          getnamesptr, subsptr)
488    
489    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
490      if (use_pcre16) \
491        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
492      else \
493        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
494    
495    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
496      if (use_pcre16) \
497        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
498      else \
499        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
500    
501    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
502      if (use_pcre16) \
503        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
504      else \
505        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
506    
507    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
508      (use_pcre16 ? \
509         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
510        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
511    
512    #define PCRE_JIT_STACK_FREE(stack) \
513      if (use_pcre16) \
514        PCRE_JIT_STACK_FREE16(stack); \
515      else \
516        PCRE_JIT_STACK_FREE8(stack)
517    
518    #define PCRE_MAKETABLES \
519      (use_pcre16? pcre16_maketables() : pcre_maketables())
520    
521    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
522      if (use_pcre16) \
523        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
524      else \
525        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
526    
527    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
528      if (use_pcre16) \
529        PCRE_PRINTINT16(re, outfile, debug_lengths); \
530      else \
531        PCRE_PRINTINT8(re, outfile, debug_lengths)
532    
533    #define PCRE_STUDY(extra, re, options, error) \
534      if (use_pcre16) \
535        PCRE_STUDY16(extra, re, options, error); \
536      else \
537        PCRE_STUDY8(extra, re, options, error)
538    
539    /* ----- Only 8-bit mode is supported ----- */
540    
541    #elif defined SUPPORT_PCRE8
542    #define CHAR_SIZE                 1
543    #define PCHARS                    PCHARS8
544    #define PCHARSV                   PCHARSV8
545    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
546    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
547    #define STRLEN                    STRLEN8
548    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
549    #define PCRE_COMPILE              PCRE_COMPILE8
550    #define PCRE_CONFIG               pcre_config
551    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
552    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
553    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
554    #define PCRE_EXEC                 PCRE_EXEC8
555    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
556    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
557    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
558    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
559    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
560    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
561    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
562    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
563    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
564    #define PCRE_MAKETABLES           pcre_maketables()
565    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
566    #define PCRE_PRINTINT             PCRE_PRINTINT8
567    #define PCRE_STUDY                PCRE_STUDY8
568    
569    /* ----- Only 16-bit mode is supported ----- */
570    
571    #else
572    #define CHAR_SIZE                 2
573    #define PCHARS                    PCHARS16
574    #define PCHARSV                   PCHARSV16
575    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
576    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
577    #define STRLEN                    STRLEN16
578    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
579    #define PCRE_COMPILE              PCRE_COMPILE16
580    #define PCRE_CONFIG               pcre16_config
581    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
582    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
583    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
584    #define PCRE_EXEC                 PCRE_EXEC16
585    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
586    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
587    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
588    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
589    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
590    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
591    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
592    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
593    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
594    #define PCRE_MAKETABLES           pcre16_maketables()
595    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
596    #define PCRE_PRINTINT             PCRE_PRINTINT16
597    #define PCRE_STUDY                PCRE_STUDY16
598    #endif
599    
600    /* ----- End of mode-specific function call macros ----- */
601    
602    
603    /* Other parameters */
604    
605  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
606  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 610 
610  #endif  #endif
611  #endif  #endif
612    
613    #if !defined NODFA
614    #define DFA_WS_DIMENSION 1000
615    #endif
616    
617    /* This is the default loop count for timing. */
618    
619    #define LOOPREPEAT 500000
620    
621    /* Static variables */
622    
623  static FILE *outfile;  static FILE *outfile;
624  static int log_store = 0;  static int log_store = 0;
625    static int callout_count;
626    static int callout_extra;
627    static int callout_fail_count;
628    static int callout_fail_id;
629    static int debug_lengths;
630    static int first_callout;
631    static int jit_was_used;
632    static int locale_set = 0;
633    static int show_malloc;
634    static int use_utf;
635    static size_t gotten_store;
636    static size_t first_gotten_store = 0;
637    static const unsigned char *last_callout_mark = NULL;
638    
639    /* The buffers grow automatically if very long input lines are encountered. */
640    
641    static int buffer_size = 50000;
642    static pcre_uint8 *buffer = NULL;
643    static pcre_uint8 *dbuffer = NULL;
644    static pcre_uint8 *pbuffer = NULL;
645    
646    /* Another buffer is needed translation to 16-bit character strings. It will
647    obtained and extended as required. */
648    
649    #ifdef SUPPORT_PCRE16
650    static int buffer16_size = 0;
651    static pcre_uint16 *buffer16 = NULL;
652    
653    #ifdef SUPPORT_PCRE8
654    
655    /* We need the table of operator lengths that is used for 16-bit compiling, in
656    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
657    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
658    appropriately for the 16-bit world. Just as a safety check, make sure that
659    COMPILE_PCRE16 is *not* set. */
660    
661    #ifdef COMPILE_PCRE16
662    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
663    #endif
664    
665    #if LINK_SIZE == 2
666    #undef LINK_SIZE
667    #define LINK_SIZE 1
668    #elif LINK_SIZE == 3 || LINK_SIZE == 4
669    #undef LINK_SIZE
670    #define LINK_SIZE 2
671    #else
672    #error LINK_SIZE must be either 2, 3, or 4
673    #endif
674    
675    #undef IMM2_SIZE
676    #define IMM2_SIZE 1
677    
678  /* Debugging function to print the internal form of the regex. This is the same  #endif /* SUPPORT_PCRE8 */
 code as contained in pcre.c under the DEBUG macro. */  
679    
680  static const char *OP_names[] = {  static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
681    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  #endif  /* SUPPORT_PCRE16 */
   "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  
   "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  
   "Brazero", "Braminzero", "Bra"  
 };  
682    
683    /* If we have 8-bit support, default use_pcre16 to false; if there is also
684    16-bit support, it can be changed by an option. If there is no 8-bit support,
685    there must be 16-bit support, so default it to 1. */
686    
687  static void print_internals(pcre *re)  #ifdef SUPPORT_PCRE8
688  {  static int use_pcre16 = 0;
689  unsigned char *code = ((real_pcre *)re)->code;  #else
690    static int use_pcre16 = 1;
691    #endif
692    
693  printf("------------------------------------------------------------------\n");  /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
694    
695  for(;;)  static int jit_study_bits[] =
696    {    {
697    int c;    PCRE_STUDY_JIT_COMPILE,
698    int charlength;    PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
699      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
700    printf("%3d ", code - ((real_pcre *)re)->code);    PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
701      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
702      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
703      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
704        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
705    };
706    
707    if (*code >= OP_BRA)  /* Textual explanations for runtime error codes */
     {  
     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
708    
709    else switch(*code)  static const char *errtexts[] = {
710      {    NULL,  /* 0 is no error */
711      case OP_END:    NULL,  /* NOMATCH is handled specially */
712      printf("    %s\n", OP_names[*code]);    "NULL argument passed",
713      printf("------------------------------------------------------------------\n");    "bad option value",
714      return;    "magic number missing",
715      "unknown opcode - pattern overwritten?",
716      "no more memory",
717      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
718      "match limit exceeded",
719      "callout error code",
720      NULL,  /* BADUTF8/16 is handled specially */
721      NULL,  /* BADUTF8/16 offset is handled specially */
722      NULL,  /* PARTIAL is handled specially */
723      "not used - internal error",
724      "internal error - pattern overwritten?",
725      "bad count value",
726      "item unsupported for DFA matching",
727      "backreference condition or recursion test not supported for DFA matching",
728      "match limit not supported for DFA matching",
729      "workspace size exceeded in DFA matching",
730      "too much recursion for DFA matching",
731      "recursion limit exceeded",
732      "not used - internal error",
733      "invalid combination of newline options",
734      "bad offset value",
735      NULL,  /* SHORTUTF8/16 is handled specially */
736      "nested recursion at the same subject position",
737      "JIT stack limit reached",
738      "pattern compiled in wrong mode: 8-bit/16-bit error",
739      "pattern compiled with other endianness",
740      "invalid data in workspace for DFA restart"
741    };
742    
     case OP_CHARS:  
     charlength = *(++code);  
     printf("%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  
     break;  
743    
744      case OP_KETRMAX:  /*************************************************
745      case OP_KETRMIN:  *         Alternate character tables             *
746      case OP_ALT:  *************************************************/
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
747    
748      case OP_STAR:  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
749      case OP_MINSTAR:  using the default tables of the library. However, the T option can be used to
750      case OP_PLUS:  select alternate sets of tables, for different kinds of testing. Note also that
751      case OP_MINPLUS:  the L (locale) option also adjusts the tables. */
752      case OP_QUERY:  
753      case OP_MINQUERY:  /* This is the set of tables distributed as default with PCRE. It recognizes
754      case OP_TYPESTAR:  only ASCII characters. */
755      case OP_TYPEMINSTAR:  
756      case OP_TYPEPLUS:  static const pcre_uint8 tables0[] = {
757      case OP_TYPEMINPLUS:  
758      case OP_TYPEQUERY:  /* This table is a lower casing table. */
759      case OP_TYPEMINQUERY:  
760      if (*code >= OP_TYPESTAR)      0,  1,  2,  3,  4,  5,  6,  7,
761        printf("    %s", OP_names[code[1]]);      8,  9, 10, 11, 12, 13, 14, 15,
762      else if (isprint(c = code[1])) printf("    %c", c);     16, 17, 18, 19, 20, 21, 22, 23,
763        else printf("    \\x%02x", c);     24, 25, 26, 27, 28, 29, 30, 31,
764      printf("%s", OP_names[*code++]);     32, 33, 34, 35, 36, 37, 38, 39,
765      break;     40, 41, 42, 43, 44, 45, 46, 47,
766       48, 49, 50, 51, 52, 53, 54, 55,
767       56, 57, 58, 59, 60, 61, 62, 63,
768       64, 97, 98, 99,100,101,102,103,
769      104,105,106,107,108,109,110,111,
770      112,113,114,115,116,117,118,119,
771      120,121,122, 91, 92, 93, 94, 95,
772       96, 97, 98, 99,100,101,102,103,
773      104,105,106,107,108,109,110,111,
774      112,113,114,115,116,117,118,119,
775      120,121,122,123,124,125,126,127,
776      128,129,130,131,132,133,134,135,
777      136,137,138,139,140,141,142,143,
778      144,145,146,147,148,149,150,151,
779      152,153,154,155,156,157,158,159,
780      160,161,162,163,164,165,166,167,
781      168,169,170,171,172,173,174,175,
782      176,177,178,179,180,181,182,183,
783      184,185,186,187,188,189,190,191,
784      192,193,194,195,196,197,198,199,
785      200,201,202,203,204,205,206,207,
786      208,209,210,211,212,213,214,215,
787      216,217,218,219,220,221,222,223,
788      224,225,226,227,228,229,230,231,
789      232,233,234,235,236,237,238,239,
790      240,241,242,243,244,245,246,247,
791      248,249,250,251,252,253,254,255,
792    
793    /* This table is a case flipping table. */
794    
795        0,  1,  2,  3,  4,  5,  6,  7,
796        8,  9, 10, 11, 12, 13, 14, 15,
797       16, 17, 18, 19, 20, 21, 22, 23,
798       24, 25, 26, 27, 28, 29, 30, 31,
799       32, 33, 34, 35, 36, 37, 38, 39,
800       40, 41, 42, 43, 44, 45, 46, 47,
801       48, 49, 50, 51, 52, 53, 54, 55,
802       56, 57, 58, 59, 60, 61, 62, 63,
803       64, 97, 98, 99,100,101,102,103,
804      104,105,106,107,108,109,110,111,
805      112,113,114,115,116,117,118,119,
806      120,121,122, 91, 92, 93, 94, 95,
807       96, 65, 66, 67, 68, 69, 70, 71,
808       72, 73, 74, 75, 76, 77, 78, 79,
809       80, 81, 82, 83, 84, 85, 86, 87,
810       88, 89, 90,123,124,125,126,127,
811      128,129,130,131,132,133,134,135,
812      136,137,138,139,140,141,142,143,
813      144,145,146,147,148,149,150,151,
814      152,153,154,155,156,157,158,159,
815      160,161,162,163,164,165,166,167,
816      168,169,170,171,172,173,174,175,
817      176,177,178,179,180,181,182,183,
818      184,185,186,187,188,189,190,191,
819      192,193,194,195,196,197,198,199,
820      200,201,202,203,204,205,206,207,
821      208,209,210,211,212,213,214,215,
822      216,217,218,219,220,221,222,223,
823      224,225,226,227,228,229,230,231,
824      232,233,234,235,236,237,238,239,
825      240,241,242,243,244,245,246,247,
826      248,249,250,251,252,253,254,255,
827    
828    /* This table contains bit maps for various character classes. Each map is 32
829    bytes long and the bits run from the least significant end of each byte. The
830    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
831    graph, print, punct, and cntrl. Other classes are built from combinations. */
832    
833      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
834      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
835      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
837    
838      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
839      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
840      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
841      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
842    
843      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
844      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
845      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
846      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
847    
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
850      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
851      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
852    
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
855      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
856      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
857    
858      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
859      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
860      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
861      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
862    
863      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
864      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
865      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
866      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
867    
868      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
869      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
870      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
871      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
872    
873      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
874      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
875      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
876      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
877    
878      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
879      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
880      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
881      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
882    
883    /* This table identifies various classes of character by individual bits:
884      0x01   white space character
885      0x02   letter
886      0x04   decimal digit
887      0x08   hexadecimal digit
888      0x10   alphanumeric or '_'
889      0x80   regular expression metacharacter or binary zero
890    */
891    
892      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
893      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
894      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
895      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
896      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
897      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
898      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
899      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
900      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
901      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
902      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
903      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
904      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
905      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
906      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
907      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
908      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
909      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
910      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
911      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
912      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
913      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
914      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
915      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
916      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
917      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
918      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
919      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
920      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
921      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
922      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
923      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
924    
925    /* This is a set of tables that came orginally from a Windows user. It seems to
926    be at least an approximation of ISO 8859. In particular, there are characters
927    greater than 128 that are marked as spaces, letters, etc. */
928    
929    static const pcre_uint8 tables1[] = {
930    0,1,2,3,4,5,6,7,
931    8,9,10,11,12,13,14,15,
932    16,17,18,19,20,21,22,23,
933    24,25,26,27,28,29,30,31,
934    32,33,34,35,36,37,38,39,
935    40,41,42,43,44,45,46,47,
936    48,49,50,51,52,53,54,55,
937    56,57,58,59,60,61,62,63,
938    64,97,98,99,100,101,102,103,
939    104,105,106,107,108,109,110,111,
940    112,113,114,115,116,117,118,119,
941    120,121,122,91,92,93,94,95,
942    96,97,98,99,100,101,102,103,
943    104,105,106,107,108,109,110,111,
944    112,113,114,115,116,117,118,119,
945    120,121,122,123,124,125,126,127,
946    128,129,130,131,132,133,134,135,
947    136,137,138,139,140,141,142,143,
948    144,145,146,147,148,149,150,151,
949    152,153,154,155,156,157,158,159,
950    160,161,162,163,164,165,166,167,
951    168,169,170,171,172,173,174,175,
952    176,177,178,179,180,181,182,183,
953    184,185,186,187,188,189,190,191,
954    224,225,226,227,228,229,230,231,
955    232,233,234,235,236,237,238,239,
956    240,241,242,243,244,245,246,215,
957    248,249,250,251,252,253,254,223,
958    224,225,226,227,228,229,230,231,
959    232,233,234,235,236,237,238,239,
960    240,241,242,243,244,245,246,247,
961    248,249,250,251,252,253,254,255,
962    0,1,2,3,4,5,6,7,
963    8,9,10,11,12,13,14,15,
964    16,17,18,19,20,21,22,23,
965    24,25,26,27,28,29,30,31,
966    32,33,34,35,36,37,38,39,
967    40,41,42,43,44,45,46,47,
968    48,49,50,51,52,53,54,55,
969    56,57,58,59,60,61,62,63,
970    64,97,98,99,100,101,102,103,
971    104,105,106,107,108,109,110,111,
972    112,113,114,115,116,117,118,119,
973    120,121,122,91,92,93,94,95,
974    96,65,66,67,68,69,70,71,
975    72,73,74,75,76,77,78,79,
976    80,81,82,83,84,85,86,87,
977    88,89,90,123,124,125,126,127,
978    128,129,130,131,132,133,134,135,
979    136,137,138,139,140,141,142,143,
980    144,145,146,147,148,149,150,151,
981    152,153,154,155,156,157,158,159,
982    160,161,162,163,164,165,166,167,
983    168,169,170,171,172,173,174,175,
984    176,177,178,179,180,181,182,183,
985    184,185,186,187,188,189,190,191,
986    224,225,226,227,228,229,230,231,
987    232,233,234,235,236,237,238,239,
988    240,241,242,243,244,245,246,215,
989    248,249,250,251,252,253,254,223,
990    192,193,194,195,196,197,198,199,
991    200,201,202,203,204,205,206,207,
992    208,209,210,211,212,213,214,247,
993    216,217,218,219,220,221,222,255,
994    0,62,0,0,1,0,0,0,
995    0,0,0,0,0,0,0,0,
996    32,0,0,0,1,0,0,0,
997    0,0,0,0,0,0,0,0,
998    0,0,0,0,0,0,255,3,
999    126,0,0,0,126,0,0,0,
1000    0,0,0,0,0,0,0,0,
1001    0,0,0,0,0,0,0,0,
1002    0,0,0,0,0,0,255,3,
1003    0,0,0,0,0,0,0,0,
1004    0,0,0,0,0,0,12,2,
1005    0,0,0,0,0,0,0,0,
1006    0,0,0,0,0,0,0,0,
1007    254,255,255,7,0,0,0,0,
1008    0,0,0,0,0,0,0,0,
1009    255,255,127,127,0,0,0,0,
1010    0,0,0,0,0,0,0,0,
1011    0,0,0,0,254,255,255,7,
1012    0,0,0,0,0,4,32,4,
1013    0,0,0,128,255,255,127,255,
1014    0,0,0,0,0,0,255,3,
1015    254,255,255,135,254,255,255,7,
1016    0,0,0,0,0,4,44,6,
1017    255,255,127,255,255,255,127,255,
1018    0,0,0,0,254,255,255,255,
1019    255,255,255,255,255,255,255,127,
1020    0,0,0,0,254,255,255,255,
1021    255,255,255,255,255,255,255,255,
1022    0,2,0,0,255,255,255,255,
1023    255,255,255,255,255,255,255,127,
1024    0,0,0,0,255,255,255,255,
1025    255,255,255,255,255,255,255,255,
1026    0,0,0,0,254,255,0,252,
1027    1,0,0,248,1,0,0,120,
1028    0,0,0,0,254,255,255,255,
1029    0,0,128,0,0,0,128,0,
1030    255,255,255,255,0,0,0,0,
1031    0,0,0,0,0,0,0,128,
1032    255,255,255,255,0,0,0,0,
1033    0,0,0,0,0,0,0,0,
1034    128,0,0,0,0,0,0,0,
1035    0,1,1,0,1,1,0,0,
1036    0,0,0,0,0,0,0,0,
1037    0,0,0,0,0,0,0,0,
1038    1,0,0,0,128,0,0,0,
1039    128,128,128,128,0,0,128,0,
1040    28,28,28,28,28,28,28,28,
1041    28,28,0,0,0,0,0,128,
1042    0,26,26,26,26,26,26,18,
1043    18,18,18,18,18,18,18,18,
1044    18,18,18,18,18,18,18,18,
1045    18,18,18,128,128,0,128,16,
1046    0,26,26,26,26,26,26,18,
1047    18,18,18,18,18,18,18,18,
1048    18,18,18,18,18,18,18,18,
1049    18,18,18,128,128,0,0,0,
1050    0,0,0,0,0,1,0,0,
1051    0,0,0,0,0,0,0,0,
1052    0,0,0,0,0,0,0,0,
1053    0,0,0,0,0,0,0,0,
1054    1,0,0,0,0,0,0,0,
1055    0,0,18,0,0,0,0,0,
1056    0,0,20,20,0,18,0,0,
1057    0,20,18,0,0,0,0,0,
1058    18,18,18,18,18,18,18,18,
1059    18,18,18,18,18,18,18,18,
1060    18,18,18,18,18,18,18,0,
1061    18,18,18,18,18,18,18,18,
1062    18,18,18,18,18,18,18,18,
1063    18,18,18,18,18,18,18,18,
1064    18,18,18,18,18,18,18,0,
1065    18,18,18,18,18,18,18,18
1066    };
1067    
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
1068    
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
1069    
     case OP_NOT:  
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
1070    
1071      case OP_NOTSTAR:  #ifndef HAVE_STRERROR
1072      case OP_NOTMINSTAR:  /*************************************************
1073      case OP_NOTPLUS:  *     Provide strerror() for non-ANSI libraries  *
1074      case OP_NOTMINPLUS:  *************************************************/
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
1075    
1076      case OP_NOTEXACT:  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1077      case OP_NOTUPTO:  in their libraries, but can provide the same facility by this simple
1078      case OP_NOTMINUPTO:  alternative function. */
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
1079    
1080      case OP_REF:  extern int   sys_nerr;
1081      printf("    \\%d", *(++code));  extern char *sys_errlist[];
     code++;  
     goto CLASS_REF_REPEAT;  
1082    
1083      case OP_CLASS:  char *
1084        {  strerror(int n)
1085        int i, min, max;  {
1086    if (n < 0 || n >= sys_nerr) return "unknown error number";
1087    return sys_errlist[n];
1088    }
1089    #endif /* HAVE_STRERROR */
1090    
       code++;  
       printf("    [");  
1091    
1092        for (i = 0; i < 256; i++)  /*************************************************
1093          {  *         JIT memory callback                    *
1094          if ((code[i/8] & (1 << (i&7))) != 0)  *************************************************/
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
1095    
1096        CLASS_REF_REPEAT:  static pcre_jit_stack* jit_callback(void *arg)
1097    {
1098    jit_was_used = TRUE;
1099    return (pcre_jit_stack *)arg;
1100    }
1101    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
1102    
1103          case OP_CRRANGE:  #if !defined NOUTF || defined SUPPORT_PCRE16
1104          case OP_CRMINRANGE:  /*************************************************
1105          min = (code[1] << 8) + code[2];  *            Convert UTF-8 string to value       *
1106          max = (code[3] << 8) + code[4];  *************************************************/
         if (max == 0) printf("{%d,}", min);  
         else printf("{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) printf("?");  
         code += 4;  
         break;  
1107    
1108          default:  /* This function takes one or more bytes that represents a UTF-8 character,
1109          code--;  and returns the value of the character.
         }  
       }  
     break;  
1110    
1111      /* Anything else is just a one-node item */  Argument:
1112      utf8bytes   a pointer to the byte vector
1113      vptr        a pointer to an int to receive the value
1114    
1115    Returns:      >  0 => the number of bytes consumed
1116                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1117    */
1118    
1119      default:  static int
1120      printf("    %s", OP_names[*code]);  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1121      break;  {
1122      }  int c = *utf8bytes++;
1123    int d = c;
1124    int i, j, s;
1125    
1126    code++;  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1127    printf("\n");    {
1128      if ((d & 0x80) == 0) break;
1129      d <<= 1;
1130    }    }
 }  
1131    
1132    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1133    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1134    
1135    /* i now has a value in the range 1-5 */
1136    
1137  /* Character string printing function. */  s = 6*i;
1138    d = (c & utf8_table3[i]) << s;
1139    
1140  static void pchars(unsigned char *p, int length)  for (j = 0; j < i; j++)
1141  {    {
1142  int c;    c = *utf8bytes++;
1143  while (length-- > 0)    if ((c & 0xc0) != 0x80) return -(j+1);
1144    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    s -= 6;
1145      else fprintf(outfile, "\\x%02x", c);    d |= (c & 0x3f) << s;
1146  }    }
1147    
1148    /* Check that encoding was the correct unique one */
1149    
1150    for (j = 0; j < utf8_table1_size; j++)
1151      if (d <= utf8_table1[j]) break;
1152    if (j != i) return -(i+1);
1153    
1154  /* Alternative malloc function, to test functionality and show the size of the  /* Valid value */
 compiled re. */  
1155    
1156  static void *new_malloc(size_t size)  *vptr = d;
1157  {  return i+1;
 if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  
 return malloc(size);  
1158  }  }
1159    #endif /* NOUTF || SUPPORT_PCRE16 */
1160    
1161    
1162    
1163  /* Read lines from named file or stdin and write to named file or stdout; lines  #if !defined NOUTF || defined SUPPORT_PCRE16
1164  consist of a regular expression, in delimiters and optionally followed by  /*************************************************
1165  options, followed by a set of test data, terminated by an empty line. */  *       Convert character value to UTF-8         *
1166    *************************************************/
1167    
1168  int main(int argc, char **argv)  /* This function takes an integer value in the range 0 - 0x7fffffff
1169    and encodes it as a UTF-8 character in 0 to 6 bytes.
1170    
1171    Arguments:
1172      cvalue     the character value
1173      utf8bytes  pointer to buffer for result - at least 6 bytes long
1174    
1175    Returns:     number of characters placed in the buffer
1176    */
1177    
1178    static int
1179    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1180  {  {
1181  FILE *infile = stdin;  register int i, j;
1182  int options = 0;  for (i = 0; i < utf8_table1_size; i++)
1183  int study_options = 0;    if (cvalue <= utf8_table1[i]) break;
1184  int op = 1;  utf8bytes += i;
1185  int timeit = 0;  for (j = i; j > 0; j--)
1186  int showinfo = 0;   {
1187  int posix = 0;   *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1188  int debug = 0;   cvalue >>= 6;
1189  unsigned char buffer[30000];   }
1190  unsigned char dbuffer[1024];  *utf8bytes = utf8_table2[i] | cvalue;
1191    return i + 1;
1192    }
1193    #endif
1194    
 /* Static so that new_malloc can use it. */  
1195    
1196  outfile = stdout;  #ifdef SUPPORT_PCRE16
1197    /*************************************************
1198    *         Convert a string to 16-bit             *
1199    *************************************************/
1200    
1201  /* Scan options */  /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1202    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1203    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1204    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1205    result is always left in buffer16.
1206    
1207    Note that this function does not object to surrogate values. This is
1208    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1209    for the purpose of testing that they are correctly faulted.
1210    
1211    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1212    in UTF-8 so that values greater than 255 can be handled.
1213    
1214    Arguments:
1215      data       TRUE if converting a data line; FALSE for a regex
1216      p          points to a byte string
1217      utf        true if UTF-8 (to be converted to UTF-16)
1218      len        number of bytes in the string (excluding trailing zero)
1219    
1220    Returns:     number of 16-bit data items used (excluding trailing zero)
1221                 OR -1 if a UTF-8 string is malformed
1222                 OR -2 if a value > 0x10ffff is encountered
1223                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1224    */
1225    
1226  while (argc > 1 && argv[op][0] == '-')  static int
1227    to16(int data, pcre_uint8 *p, int utf, int len)
1228    {
1229    pcre_uint16 *pp;
1230    
1231    if (buffer16_size < 2*len + 2)
1232    {    {
1233    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (buffer16 != NULL) free(buffer16);
1234    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    buffer16_size = 2*len + 2;
1235    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1236    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    if (buffer16 == NULL)
   else if (strcmp(argv[op], "-p") == 0) posix = 1;  
   else  
1237      {      {
1238      printf("*** Unknown option %s\n", argv[op]);      fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1239      return 1;      exit(1);
1240      }      }
   op++;  
   argc--;  
1241    }    }
1242    
1243  /* Sort out the input and output files */  pp = buffer16;
1244    
1245  if (argc > 1)  if (!utf && !data)
1246    {    {
1247    infile = fopen(argv[op], "r");    while (len-- > 0) *pp++ = *p++;
   if (infile == NULL)  
     {  
     printf("** Failed to open %s\n", argv[op]);  
     return 1;  
     }  
1248    }    }
1249    
1250  if (argc > 2)  else
1251    {    {
1252    outfile = fopen(argv[op+1], "w");    int c = 0;
1253    if (outfile == NULL)    while (len > 0)
1254      {      {
1255      printf("** Failed to open %s\n", argv[op+1]);      int chlen = utf82ord(p, &c);
1256      return 1;      if (chlen <= 0) return -1;
1257        if (c > 0x10ffff) return -2;
1258        p += chlen;
1259        len -= chlen;
1260        if (c < 0x10000) *pp++ = c; else
1261          {
1262          if (!utf) return -3;
1263          c -= 0x10000;
1264          *pp++ = 0xD800 | (c >> 10);
1265          *pp++ = 0xDC00 | (c & 0x3ff);
1266          }
1267      }      }
1268    }    }
1269    
1270  /* Set alternative malloc function */  *pp = 0;
1271    return pp - buffer16;
1272    }
1273    #endif
1274    
 pcre_malloc = new_malloc;  
1275    
1276  /* Heading line, then prompt for first re if stdin */  /*************************************************
1277    *        Read or extend an input line            *
1278    *************************************************/
1279    
1280  fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  /* Input lines are read into buffer, but both patterns and data lines can be
1281  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  continued over multiple input lines. In addition, if the buffer fills up, we
1282    want to automatically expand it so as to be able to handle extremely large
1283    lines that are needed for certain stress tests. When the input buffer is
1284    expanded, the other two buffers must also be expanded likewise, and the
1285    contents of pbuffer, which are a copy of the input for callouts, must be
1286    preserved (for when expansion happens for a data line). This is not the most
1287    optimal way of handling this, but hey, this is just a test program!
1288    
1289    Arguments:
1290      f            the file to read
1291      start        where in buffer to start (this *must* be within buffer)
1292      prompt       for stdin or readline()
1293    
1294    Returns:       pointer to the start of new data
1295                   could be a copy of start, or could be moved
1296                   NULL if no data read and EOF reached
1297    */
1298    
1299  /* Main loop */  static pcre_uint8 *
1300    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1301    {
1302    pcre_uint8 *here = start;
1303    
1304  for (;;)  for (;;)
1305    {    {
1306    pcre *re = NULL;    size_t rlen = (size_t)(buffer_size - (here - buffer));
   pcre_extra *extra = NULL;  
   regex_t preg;  
   const char *error;  
   unsigned char *p, *pp;  
   int do_study = 0;  
   int do_debug = 0;  
   int do_posix = 0;  
   int erroroffset, len, delimiter;  
1307    
1308    if (infile == stdin) printf("  re> ");    if (rlen > 1000)
1309    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;      {
1310    if (infile != stdin) fprintf(outfile, (char *)buffer);      int dlen;
1311    
1312    p = buffer;      /* If libreadline or libedit support is required, use readline() to read a
1313    while (isspace(*p)) p++;      line if the input is a terminal. Note that readline() removes the trailing
1314    if (*p == 0) continue;      newline, so we must put it back again, to be compatible with fgets(). */
1315    
1316    /* Get the delimiter and seek the end of the pattern; if is isn't  #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
1317    complete, read more. */      if (isatty(fileno(f)))
1318          {
1319          size_t len;
1320          char *s = readline(prompt);
1321          if (s == NULL) return (here == start)? NULL : start;
1322          len = strlen(s);
1323          if (len > 0) add_history(s);
1324          if (len > rlen - 1) len = rlen - 1;
1325          memcpy(here, s, len);
1326          here[len] = '\n';
1327          here[len+1] = 0;
1328          free(s);
1329          }
1330        else
1331    #endif
1332    
1333    delimiter = *p++;      /* Read the next line by normal means, prompting if the file is stdin. */
1334    
1335    if (isalnum(delimiter))        {
1336      {        if (f == stdin) printf("%s", prompt);
1337      fprintf(outfile, "** Delimiter must not be alphameric\n");        if (fgets((char *)here, rlen,  f) == NULL)
1338      goto SKIP_DATA;          return (here == start)? NULL : start;
1339      }        }
1340    
1341    pp = p;      dlen = (int)strlen((char *)here);
1342        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1343        here += dlen;
1344        }
1345    
1346    for(;;)    else
1347      {      {
1348      while (*pp != 0 && *pp != delimiter) pp++;      int new_buffer_size = 2*buffer_size;
1349      if (*pp != 0) break;      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1350        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1351        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1352    
1353      len = sizeof(buffer) - (pp - buffer);      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
     if (len < 256)  
1354        {        {
1355        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1356        goto SKIP_DATA;        exit(1);
1357        }        }
1358    
1359      if (infile == stdin) printf("    > ");      memcpy(new_buffer, buffer, buffer_size);
1360      if (fgets((char *)pp, len, infile) == NULL)      memcpy(new_pbuffer, pbuffer, buffer_size);
       {  
       fprintf(outfile, "** Unexpected EOF\n");  
       goto END_OFF;  
       }  
     if (infile != stdin) fprintf(outfile, (char *)pp);  
     }  
1361    
1362    /* Terminate the pattern at the delimiter */      buffer_size = new_buffer_size;
1363    
1364    *pp++ = 0;      start = new_buffer + (start - buffer);
1365        here = new_buffer + (here - buffer);
1366    
1367    /* Look for options after final delimiter */      free(buffer);
1368        free(dbuffer);
1369        free(pbuffer);
1370    
1371    options = 0;      buffer = new_buffer;
1372    study_options = 0;      dbuffer = new_dbuffer;
1373    while (*pp != 0)      pbuffer = new_pbuffer;
     {  
     switch (*pp++)  
       {  
       case 'i': options |= PCRE_CASELESS; break;  
       case 'm': options |= PCRE_MULTILINE; break;  
       case 's': options |= PCRE_DOTALL; break;  
       case 'x': options |= PCRE_EXTENDED; break;  
       case 'A': options |= PCRE_ANCHORED; break;  
       case 'D': do_debug = 1; break;  
       case 'E': options |= PCRE_DOLLAR_ENDONLY; break;  
       case 'P': do_posix = 1; break;  
       case 'S': do_study = 1; break;  
       case 'I': study_options |= PCRE_CASELESS; break;  
       case 'X': options |= PCRE_EXTRA; break;  
       case '\n': case ' ': break;  
       default:  
       fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);  
       goto SKIP_DATA;  
       }  
1374      }      }
1375      }
1376    
1377    /* Handle compiing via the POSIX interface, which doesn't support the  return NULL;  /* Control never gets here */
1378    timing, showing, or debugging options. */  }
   
   if (posix || do_posix)  
     {  
     int rc;  
     int cflags = 0;  
     if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;  
     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;  
     rc = regcomp(&preg, (char *)p, cflags);  
1379    
     /* Compilation failed; go back for another re, skipping to blank line  
     if non-interactive. */  
1380    
     if (rc != 0)  
       {  
       (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));  
       fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);  
       goto SKIP_DATA;  
       }  
     }  
1381    
1382    /* Handle compiling via the native interface */  /*************************************************
1383    *          Read number from string               *
1384    *************************************************/
1385    
1386    else  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1387      {  around with conditional compilation, just do the job by hand. It is only used
1388      if (timeit)  for unpicking arguments, so just keep it simple.
1389        {  
1390        register int i;  Arguments:
1391        clock_t time_taken;    str           string to be converted
1392        clock_t start_time = clock();    endptr        where to put the end pointer
       for (i = 0; i < 4000; i++)  
         {  
         re = pcre_compile((char *)p, options, &error, &erroroffset);  
         if (re != NULL) free(re);  
         }  
       time_taken = clock() - start_time;  
       fprintf(outfile, "Compile time %.2f milliseconds\n",  
         ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
       }  
1393    
1394      re = pcre_compile((char *)p, options, &error, &erroroffset);  Returns:        the unsigned long
1395    */
1396    
1397    static int
1398    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1399    {
1400    int result = 0;
1401    while(*str != 0 && isspace(*str)) str++;
1402    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1403    *endptr = str;
1404    return(result);
1405    }
1406    
1407    
1408    
1409    /*************************************************
1410    *             Print one character                *
1411    *************************************************/
1412    
1413    /* Print a single character either literally, or as a hex escape. */
1414    
1415    static int pchar(int c, FILE *f)
1416    {
1417    if (PRINTOK(c))
1418      {
1419      if (f != NULL) fprintf(f, "%c", c);
1420      return 1;
1421      }
1422    
1423    if (c < 0x100)
1424      {
1425      if (use_utf)
1426        {
1427        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1428        return 6;
1429        }
1430      else
1431        {
1432        if (f != NULL) fprintf(f, "\\x%02x", c);
1433        return 4;
1434        }
1435      }
1436    
1437    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1438    return (c <= 0x000000ff)? 6 :
1439           (c <= 0x00000fff)? 7 :
1440           (c <= 0x0000ffff)? 8 :
1441           (c <= 0x000fffff)? 9 : 10;
1442    }
1443    
1444    
1445    
1446    #ifdef SUPPORT_PCRE8
1447    /*************************************************
1448    *         Print 8-bit character string           *
1449    *************************************************/
1450    
1451    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1452    If handed a NULL file, just counts chars without printing. */
1453    
1454    static int pchars(pcre_uint8 *p, int length, FILE *f)
1455    {
1456    int c = 0;
1457    int yield = 0;
1458    
1459    if (length < 0)
1460      length = strlen((char *)p);
1461    
1462    while (length-- > 0)
1463      {
1464    #if !defined NOUTF
1465      if (use_utf)
1466        {
1467        int rc = utf82ord(p, &c);
1468        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1469          {
1470          length -= rc - 1;
1471          p += rc;
1472          yield += pchar(c, f);
1473          continue;
1474          }
1475        }
1476    #endif
1477      c = *p++;
1478      yield += pchar(c, f);
1479      }
1480    
1481    return yield;
1482    }
1483    #endif
1484    
1485    
1486    
1487    #ifdef SUPPORT_PCRE16
1488    /*************************************************
1489    *    Find length of 0-terminated 16-bit string   *
1490    *************************************************/
1491    
1492    static int strlen16(PCRE_SPTR16 p)
1493    {
1494    int len = 0;
1495    while (*p++ != 0) len++;
1496    return len;
1497    }
1498    #endif  /* SUPPORT_PCRE16 */
1499    
1500    
1501    #ifdef SUPPORT_PCRE16
1502    /*************************************************
1503    *           Print 16-bit character string        *
1504    *************************************************/
1505    
1506    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1507    If handed a NULL file, just counts chars without printing. */
1508    
1509    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1510    {
1511    int yield = 0;
1512    
1513    if (length < 0)
1514      length = strlen16(p);
1515    
1516    while (length-- > 0)
1517      {
1518      int c = *p++ & 0xffff;
1519    #if !defined NOUTF
1520      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1521        {
1522        int d = *p & 0xffff;
1523        if (d >= 0xDC00 && d < 0xDFFF)
1524          {
1525          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1526          length--;
1527          p++;
1528          }
1529        }
1530    #endif
1531      yield += pchar(c, f);
1532      }
1533    
1534    return yield;
1535    }
1536    #endif  /* SUPPORT_PCRE16 */
1537    
1538    
1539    
1540    #ifdef SUPPORT_PCRE8
1541    /*************************************************
1542    *     Read a capture name (8-bit) and check it   *
1543    *************************************************/
1544    
1545    static pcre_uint8 *
1546    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1547    {
1548    pcre_uint8 *npp = *pp;
1549    while (isalnum(*p)) *npp++ = *p++;
1550    *npp++ = 0;
1551    *npp = 0;
1552    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1553      {
1554      fprintf(outfile, "no parentheses with name \"");
1555      PCHARSV(*pp, 0, -1, outfile);
1556      fprintf(outfile, "\"\n");
1557      }
1558    
1559    *pp = npp;
1560    return p;
1561    }
1562    #endif  /* SUPPORT_PCRE8 */
1563    
1564    
1565    
1566    #ifdef SUPPORT_PCRE16
1567    /*************************************************
1568    *     Read a capture name (16-bit) and check it  *
1569    *************************************************/
1570    
1571    /* Note that the text being read is 8-bit. */
1572    
1573    static pcre_uint8 *
1574    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1575    {
1576    pcre_uint16 *npp = *pp;
1577    while (isalnum(*p)) *npp++ = *p++;
1578    *npp++ = 0;
1579    *npp = 0;
1580    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1581      {
1582      fprintf(outfile, "no parentheses with name \"");
1583      PCHARSV(*pp, 0, -1, outfile);
1584      fprintf(outfile, "\"\n");
1585      }
1586    *pp = npp;
1587    return p;
1588    }
1589    #endif  /* SUPPORT_PCRE16 */
1590    
1591    
1592    
1593    /*************************************************
1594    *              Callout function                  *
1595    *************************************************/
1596    
1597    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1598    the match. Yield zero unless more callouts than the fail count, or the callout
1599    data is not zero. */
1600    
1601    static int callout(pcre_callout_block *cb)
1602    {
1603    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1604    int i, pre_start, post_start, subject_length;
1605    
1606    if (callout_extra)
1607      {
1608      fprintf(f, "Callout %d: last capture = %d\n",
1609        cb->callout_number, cb->capture_last);
1610    
1611      for (i = 0; i < cb->capture_top * 2; i += 2)
1612        {
1613        if (cb->offset_vector[i] < 0)
1614          fprintf(f, "%2d: <unset>\n", i/2);
1615        else
1616          {
1617          fprintf(f, "%2d: ", i/2);
1618          PCHARSV(cb->subject, cb->offset_vector[i],
1619            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1620          fprintf(f, "\n");
1621          }
1622        }
1623      }
1624    
1625    /* Re-print the subject in canonical form, the first time or if giving full
1626    datails. On subsequent calls in the same match, we use pchars just to find the
1627    printed lengths of the substrings. */
1628    
1629    if (f != NULL) fprintf(f, "--->");
1630    
1631    PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1632    PCHARS(post_start, cb->subject, cb->start_match,
1633      cb->current_position - cb->start_match, f);
1634    
1635    PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1636    
1637    PCHARSV(cb->subject, cb->current_position,
1638      cb->subject_length - cb->current_position, f);
1639    
1640    if (f != NULL) fprintf(f, "\n");
1641    
1642    /* Always print appropriate indicators, with callout number if not already
1643    shown. For automatic callouts, show the pattern offset. */
1644    
1645    if (cb->callout_number == 255)
1646      {
1647      fprintf(outfile, "%+3d ", cb->pattern_position);
1648      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1649      }
1650    else
1651      {
1652      if (callout_extra) fprintf(outfile, "    ");
1653        else fprintf(outfile, "%3d ", cb->callout_number);
1654      }
1655    
1656    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1657    fprintf(outfile, "^");
1658    
1659    if (post_start > 0)
1660      {
1661      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1662      fprintf(outfile, "^");
1663      }
1664    
1665    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1666      fprintf(outfile, " ");
1667    
1668    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1669      pbuffer + cb->pattern_position);
1670    
1671    fprintf(outfile, "\n");
1672    first_callout = 0;
1673    
1674    if (cb->mark != last_callout_mark)
1675      {
1676      if (cb->mark == NULL)
1677        fprintf(outfile, "Latest Mark: <unset>\n");
1678      else
1679        {
1680        fprintf(outfile, "Latest Mark: ");
1681        PCHARSV(cb->mark, 0, -1, outfile);
1682        putc('\n', outfile);
1683        }
1684      last_callout_mark = cb->mark;
1685      }
1686    
1687    if (cb->callout_data != NULL)
1688      {
1689      int callout_data = *((int *)(cb->callout_data));
1690      if (callout_data != 0)
1691        {
1692        fprintf(outfile, "Callout data = %d\n", callout_data);
1693        return callout_data;
1694        }
1695      }
1696    
1697    return (cb->callout_number != callout_fail_id)? 0 :
1698           (++callout_count >= callout_fail_count)? 1 : 0;
1699    }
1700    
1701    
1702    /*************************************************
1703    *            Local malloc functions              *
1704    *************************************************/
1705    
1706    /* Alternative malloc function, to test functionality and save the size of a
1707    compiled re, which is the first store request that pcre_compile() makes. The
1708    show_malloc variable is set only during matching. */
1709    
1710    static void *new_malloc(size_t size)
1711    {
1712    void *block = malloc(size);
1713    gotten_store = size;
1714    if (first_gotten_store == 0) first_gotten_store = size;
1715    if (show_malloc)
1716      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1717    return block;
1718    }
1719    
1720    static void new_free(void *block)
1721    {
1722    if (show_malloc)
1723      fprintf(outfile, "free             %p\n", block);
1724    free(block);
1725    }
1726    
1727    /* For recursion malloc/free, to test stacking calls */
1728    
1729    static void *stack_malloc(size_t size)
1730    {
1731    void *block = malloc(size);
1732    if (show_malloc)
1733      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1734    return block;
1735    }
1736    
1737    static void stack_free(void *block)
1738    {
1739    if (show_malloc)
1740      fprintf(outfile, "stack_free       %p\n", block);
1741    free(block);
1742    }
1743    
1744    
1745    /*************************************************
1746    *          Call pcre_fullinfo()                  *
1747    *************************************************/
1748    
1749    /* Get one piece of information from the pcre_fullinfo() function. When only
1750    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1751    value, but the code is defensive.
1752    
1753    Arguments:
1754      re        compiled regex
1755      study     study data
1756      option    PCRE_INFO_xxx option
1757      ptr       where to put the data
1758    
1759    Returns:    0 when OK, < 0 on error
1760    */
1761    
1762    static int
1763    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1764    {
1765    int rc;
1766    
1767    if (use_pcre16)
1768    #ifdef SUPPORT_PCRE16
1769      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1770    #else
1771      rc = PCRE_ERROR_BADMODE;
1772    #endif
1773    else
1774    #ifdef SUPPORT_PCRE8
1775      rc = pcre_fullinfo(re, study, option, ptr);
1776    #else
1777      rc = PCRE_ERROR_BADMODE;
1778    #endif
1779    
1780    if (rc < 0)
1781      {
1782      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1783        use_pcre16? "16" : "", option);
1784      if (rc == PCRE_ERROR_BADMODE)
1785        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1786          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1787      }
1788    
1789    return rc;
1790    }
1791    
1792    
1793    
1794    /*************************************************
1795    *             Swap byte functions                *
1796    *************************************************/
1797    
1798    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1799    value, respectively.
1800    
1801    Arguments:
1802      value        any number
1803    
1804    Returns:       the byte swapped value
1805    */
1806    
1807    static pcre_uint32
1808    swap_uint32(pcre_uint32 value)
1809    {
1810    return ((value & 0x000000ff) << 24) |
1811           ((value & 0x0000ff00) <<  8) |
1812           ((value & 0x00ff0000) >>  8) |
1813           (value >> 24);
1814    }
1815    
1816    static pcre_uint16
1817    swap_uint16(pcre_uint16 value)
1818    {
1819    return (value >> 8) | (value << 8);
1820    }
1821    
1822    
1823    
1824    /*************************************************
1825    *        Flip bytes in a compiled pattern        *
1826    *************************************************/
1827    
1828    /* This function is called if the 'F' option was present on a pattern that is
1829    to be written to a file. We flip the bytes of all the integer fields in the
1830    regex data block and the study block. In 16-bit mode this also flips relevant
1831    bytes in the pattern itself. This is to make it possible to test PCRE's
1832    ability to reload byte-flipped patterns, e.g. those compiled on a different
1833    architecture. */
1834    
1835    static void
1836    regexflip(pcre *ere, pcre_extra *extra)
1837    {
1838    REAL_PCRE *re = (REAL_PCRE *)ere;
1839    #ifdef SUPPORT_PCRE16
1840    int op;
1841    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1842    int length = re->name_count * re->name_entry_size;
1843    #ifdef SUPPORT_UTF
1844    BOOL utf = (re->options & PCRE_UTF16) != 0;
1845    BOOL utf16_char = FALSE;
1846    #endif /* SUPPORT_UTF */
1847    #endif /* SUPPORT_PCRE16 */
1848    
1849    /* Always flip the bytes in the main data block and study blocks. */
1850    
1851    re->magic_number = REVERSED_MAGIC_NUMBER;
1852    re->size = swap_uint32(re->size);
1853    re->options = swap_uint32(re->options);
1854    re->flags = swap_uint16(re->flags);
1855    re->top_bracket = swap_uint16(re->top_bracket);
1856    re->top_backref = swap_uint16(re->top_backref);
1857    re->first_char = swap_uint16(re->first_char);
1858    re->req_char = swap_uint16(re->req_char);
1859    re->name_table_offset = swap_uint16(re->name_table_offset);
1860    re->name_entry_size = swap_uint16(re->name_entry_size);
1861    re->name_count = swap_uint16(re->name_count);
1862    
1863    if (extra != NULL)
1864      {
1865      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1866      rsd->size = swap_uint32(rsd->size);
1867      rsd->flags = swap_uint32(rsd->flags);
1868      rsd->minlength = swap_uint32(rsd->minlength);
1869      }
1870    
1871    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1872    in the name table, if present, and then in the pattern itself. */
1873    
1874    #ifdef SUPPORT_PCRE16
1875    if (!use_pcre16) return;
1876    
1877    while(TRUE)
1878      {
1879      /* Swap previous characters. */
1880      while (length-- > 0)
1881        {
1882        *ptr = swap_uint16(*ptr);
1883        ptr++;
1884        }
1885    #ifdef SUPPORT_UTF
1886      if (utf16_char)
1887        {
1888        if ((ptr[-1] & 0xfc00) == 0xd800)
1889          {
1890          /* We know that there is only one extra character in UTF-16. */
1891          *ptr = swap_uint16(*ptr);
1892          ptr++;
1893          }
1894        }
1895      utf16_char = FALSE;
1896    #endif /* SUPPORT_UTF */
1897    
1898      /* Get next opcode. */
1899    
1900      length = 0;
1901      op = *ptr;
1902      *ptr++ = swap_uint16(op);
1903    
1904      switch (op)
1905        {
1906        case OP_END:
1907        return;
1908    
1909    #ifdef SUPPORT_UTF
1910        case OP_CHAR:
1911        case OP_CHARI:
1912        case OP_NOT:
1913        case OP_NOTI:
1914        case OP_STAR:
1915        case OP_MINSTAR:
1916        case OP_PLUS:
1917        case OP_MINPLUS:
1918        case OP_QUERY:
1919        case OP_MINQUERY:
1920        case OP_UPTO:
1921        case OP_MINUPTO:
1922        case OP_EXACT:
1923        case OP_POSSTAR:
1924        case OP_POSPLUS:
1925        case OP_POSQUERY:
1926        case OP_POSUPTO:
1927        case OP_STARI:
1928        case OP_MINSTARI:
1929        case OP_PLUSI:
1930        case OP_MINPLUSI:
1931        case OP_QUERYI:
1932        case OP_MINQUERYI:
1933        case OP_UPTOI:
1934        case OP_MINUPTOI:
1935        case OP_EXACTI:
1936        case OP_POSSTARI:
1937        case OP_POSPLUSI:
1938        case OP_POSQUERYI:
1939        case OP_POSUPTOI:
1940        case OP_NOTSTAR:
1941        case OP_NOTMINSTAR:
1942        case OP_NOTPLUS:
1943        case OP_NOTMINPLUS:
1944        case OP_NOTQUERY:
1945        case OP_NOTMINQUERY:
1946        case OP_NOTUPTO:
1947        case OP_NOTMINUPTO:
1948        case OP_NOTEXACT:
1949        case OP_NOTPOSSTAR:
1950        case OP_NOTPOSPLUS:
1951        case OP_NOTPOSQUERY:
1952        case OP_NOTPOSUPTO:
1953        case OP_NOTSTARI:
1954        case OP_NOTMINSTARI:
1955        case OP_NOTPLUSI:
1956        case OP_NOTMINPLUSI:
1957        case OP_NOTQUERYI:
1958        case OP_NOTMINQUERYI:
1959        case OP_NOTUPTOI:
1960        case OP_NOTMINUPTOI:
1961        case OP_NOTEXACTI:
1962        case OP_NOTPOSSTARI:
1963        case OP_NOTPOSPLUSI:
1964        case OP_NOTPOSQUERYI:
1965        case OP_NOTPOSUPTOI:
1966        if (utf) utf16_char = TRUE;
1967    #endif
1968        /* Fall through. */
1969    
1970        default:
1971        length = OP_lengths16[op] - 1;
1972        break;
1973    
1974        case OP_CLASS:
1975        case OP_NCLASS:
1976        /* Skip the character bit map. */
1977        ptr += 32/sizeof(pcre_uint16);
1978        length = 0;
1979        break;
1980    
1981        case OP_XCLASS:
1982        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1983        if (LINK_SIZE > 1)
1984          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1985            - (1 + LINK_SIZE + 1));
1986        else
1987          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1988    
1989        /* Reverse the size of the XCLASS instance. */
1990        *ptr = swap_uint16(*ptr);
1991        ptr++;
1992        if (LINK_SIZE > 1)
1993          {
1994          *ptr = swap_uint16(*ptr);
1995          ptr++;
1996          }
1997    
1998        op = *ptr;
1999        *ptr = swap_uint16(op);
2000        ptr++;
2001        if ((op & XCL_MAP) != 0)
2002          {
2003          /* Skip the character bit map. */
2004          ptr += 32/sizeof(pcre_uint16);
2005          length -= 32/sizeof(pcre_uint16);
2006          }
2007        break;
2008        }
2009      }
2010    /* Control should never reach here in 16 bit mode. */
2011    #endif /* SUPPORT_PCRE16 */
2012    }
2013    
2014    
2015    
2016    /*************************************************
2017    *        Check match or recursion limit          *
2018    *************************************************/
2019    
2020    static int
2021    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2022      int start_offset, int options, int *use_offsets, int use_size_offsets,
2023      int flag, unsigned long int *limit, int errnumber, const char *msg)
2024    {
2025    int count;
2026    int min = 0;
2027    int mid = 64;
2028    int max = -1;
2029    
2030    extra->flags |= flag;
2031    
2032    for (;;)
2033      {
2034      *limit = mid;
2035    
2036      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2037        use_offsets, use_size_offsets);
2038    
2039      if (count == errnumber)
2040        {
2041        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2042        min = mid;
2043        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
2044        }
2045    
2046      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
2047                             count == PCRE_ERROR_PARTIAL)
2048        {
2049        if (mid == min + 1)
2050          {
2051          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
2052          break;
2053          }
2054        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
2055        max = mid;
2056        mid = (min + mid)/2;
2057        }
2058      else break;    /* Some other error */
2059      }
2060    
2061    extra->flags &= ~flag;
2062    return count;
2063    }
2064    
2065    
2066    
2067    /*************************************************
2068    *         Case-independent strncmp() function    *
2069    *************************************************/
2070    
2071    /*
2072    Arguments:
2073      s         first string
2074      t         second string
2075      n         number of characters to compare
2076    
2077    Returns:    < 0, = 0, or > 0, according to the comparison
2078    */
2079    
2080    static int
2081    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2082    {
2083    while (n--)
2084      {
2085      int c = tolower(*s++) - tolower(*t++);
2086      if (c) return c;
2087      }
2088    return 0;
2089    }
2090    
2091    
2092    
2093    /*************************************************
2094    *         Check newline indicator                *
2095    *************************************************/
2096    
2097    /* This is used both at compile and run-time to check for <xxx> escapes. Print
2098    a message and return 0 if there is no match.
2099    
2100    Arguments:
2101      p           points after the leading '<'
2102      f           file for error message
2103    
2104    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
2105    */
2106    
2107    static int
2108    check_newline(pcre_uint8 *p, FILE *f)
2109    {
2110    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2111    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2112    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2113    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2114    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2115    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2116    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2117    fprintf(f, "Unknown newline type at: <%s\n", p);
2118    return 0;
2119    }
2120    
2121    
2122    
2123    /*************************************************
2124    *             Usage function                     *
2125    *************************************************/
2126    
2127    static void
2128    usage(void)
2129    {
2130    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2131    printf("Input and output default to stdin and stdout.\n");
2132    #if defined(SUPPORT_LIBREADLINE) || defined(SUPPORT_LIBEDIT)
2133    printf("If input is a terminal, readline() is used to read from it.\n");
2134    #else
2135    printf("This version of pcretest is not linked with readline().\n");
2136    #endif
2137    printf("\nOptions:\n");
2138    #ifdef SUPPORT_PCRE16
2139    printf("  -16      use the 16-bit library\n");
2140    #endif
2141    printf("  -b       show compiled code\n");
2142    printf("  -C       show PCRE compile-time options and exit\n");
2143    printf("  -C arg   show a specific compile-time option\n");
2144    printf("           and exit with its value. The arg can be:\n");
2145    printf("     linksize     internal link size [2, 3, 4]\n");
2146    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2147    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2148    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2149    printf("     ucp          Unicode Properties supported [0, 1]\n");
2150    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2151    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2152    printf("  -d       debug: show compiled code and information (-b and -i)\n");
2153    #if !defined NODFA
2154    printf("  -dfa     force DFA matching for all subjects\n");
2155    #endif
2156    printf("  -help    show usage information\n");
2157    printf("  -i       show information about compiled patterns\n"
2158           "  -M       find MATCH_LIMIT minimum for each subject\n"
2159           "  -m       output memory used information\n"
2160           "  -o <n>   set size of offsets vector to <n>\n");
2161    #if !defined NOPOSIX
2162    printf("  -p       use POSIX interface\n");
2163    #endif
2164    printf("  -q       quiet: do not output PCRE version number at start\n");
2165    printf("  -S <n>   set stack size to <n> megabytes\n");
2166    printf("  -s       force each pattern to be studied at basic level\n"
2167           "  -s+      force each pattern to be studied, using JIT if available\n"
2168           "  -s++     ditto, verifying when JIT was actually used\n"
2169           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2170           "             where 1 <= n <= 7 selects JIT options\n"
2171           "  -s++n    ditto, verifying when JIT was actually used\n"
2172           "  -t       time compilation and execution\n");
2173    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2174    printf("  -tm      time execution (matching) only\n");
2175    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
2176    }
2177    
2178    
2179    
2180    /*************************************************
2181    *                Main Program                    *
2182    *************************************************/
2183    
2184    /* Read lines from named file or stdin and write to named file or stdout; lines
2185    consist of a regular expression, in delimiters and optionally followed by
2186    options, followed by a set of test data, terminated by an empty line. */
2187    
2188    int main(int argc, char **argv)
2189    {
2190    FILE *infile = stdin;
2191    const char *version;
2192    int options = 0;
2193    int study_options = 0;
2194    int default_find_match_limit = FALSE;
2195    int op = 1;
2196    int timeit = 0;
2197    int timeitm = 0;
2198    int showinfo = 0;
2199    int showstore = 0;
2200    int force_study = -1;
2201    int force_study_options = 0;
2202    int quiet = 0;
2203    int size_offsets = 45;
2204    int size_offsets_max;
2205    int *offsets = NULL;
2206    int debug = 0;
2207    int done = 0;
2208    int all_use_dfa = 0;
2209    int verify_jit = 0;
2210    int yield = 0;
2211    int stack_size;
2212    
2213    #if !defined NOPOSIX
2214    int posix = 0;
2215    #endif
2216    #if !defined NODFA
2217    int *dfa_workspace = NULL;
2218    #endif
2219    
2220    pcre_jit_stack *jit_stack = NULL;
2221    
2222    /* These vectors store, end-to-end, a list of zero-terminated captured
2223    substring names, each list itself being terminated by an empty name. Assume
2224    that 1024 is plenty long enough for the few names we'll be testing. It is
2225    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2226    for the actual memory, to ensure alignment. */
2227    
2228    pcre_uint16 copynames[1024];
2229    pcre_uint16 getnames[1024];
2230    
2231    #ifdef SUPPORT_PCRE16
2232    pcre_uint16 *cn16ptr;
2233    pcre_uint16 *gn16ptr;
2234    #endif
2235    
2236    #ifdef SUPPORT_PCRE8
2237    pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2238    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2239    pcre_uint8 *cn8ptr;
2240    pcre_uint8 *gn8ptr;
2241    #endif
2242    
2243    /* Get buffers from malloc() so that valgrind will check their misuse when
2244    debugging. They grow automatically when very long lines are read. The 16-bit
2245    buffer (buffer16) is obtained only if needed. */
2246    
2247    buffer = (pcre_uint8 *)malloc(buffer_size);
2248    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2249    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2250    
2251    /* The outfile variable is static so that new_malloc can use it. */
2252    
2253    outfile = stdout;
2254    
2255    /* The following  _setmode() stuff is some Windows magic that tells its runtime
2256    library to translate CRLF into a single LF character. At least, that's what
2257    I've been told: never having used Windows I take this all on trust. Originally
2258    it set 0x8000, but then I was advised that _O_BINARY was better. */
2259    
2260    #if defined(_WIN32) || defined(WIN32)
2261    _setmode( _fileno( stdout ), _O_BINARY );
2262    #endif
2263    
2264    /* Get the version number: both pcre_version() and pcre16_version() give the
2265    same answer. We just need to ensure that we call one that is available. */
2266    
2267    #ifdef SUPPORT_PCRE8
2268    version = pcre_version();
2269    #else
2270    version = pcre16_version();
2271    #endif
2272    
2273    /* Scan options */
2274    
2275    while (argc > 1 && argv[op][0] == '-')
2276      {
2277      pcre_uint8 *endptr;
2278      char *arg = argv[op];
2279    
2280      if (strcmp(arg, "-m") == 0) showstore = 1;
2281      else if (strcmp(arg, "-s") == 0) force_study = 0;
2282    
2283      else if (strncmp(arg, "-s+", 3) == 0)
2284        {
2285        arg += 3;
2286        if (*arg == '+') { arg++; verify_jit = TRUE; }
2287        force_study = 1;
2288        if (*arg == 0)
2289          force_study_options = jit_study_bits[6];
2290        else if (*arg >= '1' && *arg <= '7')
2291          force_study_options = jit_study_bits[*arg - '1'];
2292        else goto BAD_ARG;
2293        }
2294      else if (strcmp(arg, "-16") == 0)
2295        {
2296    #ifdef SUPPORT_PCRE16
2297        use_pcre16 = 1;
2298    #else
2299        printf("** This version of PCRE was built without 16-bit support\n");
2300        exit(1);
2301    #endif
2302        }
2303      else if (strcmp(arg, "-q") == 0) quiet = 1;
2304      else if (strcmp(arg, "-b") == 0) debug = 1;
2305      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2306      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2307      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2308    #if !defined NODFA
2309      else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2310    #endif
2311      else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2312          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2313            *endptr == 0))
2314        {
2315        op++;
2316        argc--;
2317        }
2318      else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2319        {
2320        int both = arg[2] == 0;
2321        int temp;
2322        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2323                         *endptr == 0))
2324          {
2325          timeitm = temp;
2326          op++;
2327          argc--;
2328          }
2329        else timeitm = LOOPREPEAT;
2330        if (both) timeit = timeitm;
2331        }
2332      else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2333          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2334            *endptr == 0))
2335        {
2336    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2337        printf("PCRE: -S not supported on this OS\n");
2338        exit(1);
2339    #else
2340        int rc;
2341        struct rlimit rlim;
2342        getrlimit(RLIMIT_STACK, &rlim);
2343        rlim.rlim_cur = stack_size * 1024 * 1024;
2344        rc = setrlimit(RLIMIT_STACK, &rlim);
2345        if (rc != 0)
2346          {
2347        printf("PCRE: setrlimit() failed with error %d\n", rc);
2348        exit(1);
2349          }
2350        op++;
2351        argc--;
2352    #endif
2353        }
2354    #if !defined NOPOSIX
2355      else if (strcmp(arg, "-p") == 0) posix = 1;
2356    #endif
2357      else if (strcmp(arg, "-C") == 0)
2358        {
2359        int rc;
2360        unsigned long int lrc;
2361    
2362        if (argc > 2)
2363          {
2364          if (strcmp(argv[op + 1], "linksize") == 0)
2365            {
2366            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2367            printf("%d\n", rc);
2368            yield = rc;
2369            goto EXIT;
2370            }
2371          if (strcmp(argv[op + 1], "pcre8") == 0)
2372            {
2373    #ifdef SUPPORT_PCRE8
2374            printf("1\n");
2375            yield = 1;
2376    #else
2377            printf("0\n");
2378            yield = 0;
2379    #endif
2380            goto EXIT;
2381            }
2382          if (strcmp(argv[op + 1], "pcre16") == 0)
2383            {
2384    #ifdef SUPPORT_PCRE16
2385            printf("1\n");
2386            yield = 1;
2387    #else
2388            printf("0\n");
2389            yield = 0;
2390    #endif
2391            goto EXIT;
2392            }
2393          if (strcmp(argv[op + 1], "utf") == 0)
2394            {
2395    #ifdef SUPPORT_PCRE8
2396            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2397            printf("%d\n", rc);
2398            yield = rc;
2399    #else
2400            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2401            printf("%d\n", rc);
2402            yield = rc;
2403    #endif
2404            goto EXIT;
2405            }
2406          if (strcmp(argv[op + 1], "ucp") == 0)
2407            {
2408            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2409            printf("%d\n", rc);
2410            yield = rc;
2411            goto EXIT;
2412            }
2413          if (strcmp(argv[op + 1], "jit") == 0)
2414            {
2415            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2416            printf("%d\n", rc);
2417            yield = rc;
2418            goto EXIT;
2419            }
2420          if (strcmp(argv[op + 1], "newline") == 0)
2421            {
2422            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2423            /* Note that these values are always the ASCII values, even
2424            in EBCDIC environments. CR is 13 and NL is 10. */
2425            printf("%s\n", (rc == 13)? "CR" :
2426              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2427              (rc == -2)? "ANYCRLF" :
2428              (rc == -1)? "ANY" : "???");
2429            goto EXIT;
2430            }
2431          printf("Unknown -C option: %s\n", argv[op + 1]);
2432          goto EXIT;
2433          }
2434    
2435        printf("PCRE version %s\n", version);
2436        printf("Compiled with\n");
2437    
2438    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2439    are set, either both UTFs are supported or both are not supported. */
2440    
2441    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2442        printf("  8-bit and 16-bit support\n");
2443        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2444        if (rc)
2445          printf("  UTF-8 and UTF-16 support\n");
2446        else
2447          printf("  No UTF-8 or UTF-16 support\n");
2448    #elif defined SUPPORT_PCRE8
2449        printf("  8-bit support only\n");
2450        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2451        printf("  %sUTF-8 support\n", rc? "" : "No ");
2452    #else
2453        printf("  16-bit support only\n");
2454        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2455        printf("  %sUTF-16 support\n", rc? "" : "No ");
2456    #endif
2457    
2458        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2459        printf("  %sUnicode properties support\n", rc? "" : "No ");
2460        (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2461        if (rc)
2462          {
2463          const char *arch;
2464          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2465          printf("  Just-in-time compiler support: %s\n", arch);
2466          }
2467        else
2468          printf("  No just-in-time compiler support\n");
2469        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2470        /* Note that these values are always the ASCII values, even
2471        in EBCDIC environments. CR is 13 and NL is 10. */
2472        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2473          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2474          (rc == -2)? "ANYCRLF" :
2475          (rc == -1)? "ANY" : "???");
2476        (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2477        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2478                                         "all Unicode newlines");
2479        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2480        printf("  Internal link size = %d\n", rc);
2481        (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2482        printf("  POSIX malloc threshold = %d\n", rc);
2483        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2484        printf("  Default match limit = %ld\n", lrc);
2485        (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2486        printf("  Default recursion depth limit = %ld\n", lrc);
2487        (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2488        printf("  Match recursion uses %s", rc? "stack" : "heap");
2489        if (showstore)
2490          {
2491          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2492          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2493          }
2494        printf("\n");
2495        goto EXIT;
2496        }
2497      else if (strcmp(arg, "-help") == 0 ||
2498               strcmp(arg, "--help") == 0)
2499        {
2500        usage();
2501        goto EXIT;
2502        }
2503      else
2504        {
2505        BAD_ARG:
2506        printf("** Unknown or malformed option %s\n", arg);
2507        usage();
2508        yield = 1;
2509        goto EXIT;
2510        }
2511      op++;
2512      argc--;
2513      }
2514    
2515    /* Get the store for the offsets vector, and remember what it was */
2516    
2517    size_offsets_max = size_offsets;
2518    offsets = (int *)malloc(size_offsets_max * sizeof(int));
2519    if (offsets == NULL)
2520      {
2521      printf("** Failed to get %d bytes of memory for offsets vector\n",
2522        (int)(size_offsets_max * sizeof(int)));
2523      yield = 1;
2524      goto EXIT;
2525      }
2526    
2527    /* Sort out the input and output files */
2528    
2529    if (argc > 1)
2530      {
2531      infile = fopen(argv[op], INPUT_MODE);
2532      if (infile == NULL)
2533        {
2534        printf("** Failed to open %s\n", argv[op]);
2535        yield = 1;
2536        goto EXIT;
2537        }
2538      }
2539    
2540    if (argc > 2)
2541      {
2542      outfile = fopen(argv[op+1], OUTPUT_MODE);
2543      if (outfile == NULL)
2544        {
2545        printf("** Failed to open %s\n", argv[op+1]);
2546        yield = 1;
2547        goto EXIT;
2548        }
2549      }
2550    
2551    /* Set alternative malloc function */
2552    
2553    #ifdef SUPPORT_PCRE8
2554    pcre_malloc = new_malloc;
2555    pcre_free = new_free;
2556    pcre_stack_malloc = stack_malloc;
2557    pcre_stack_free = stack_free;
2558    #endif
2559    
2560    #ifdef SUPPORT_PCRE16
2561    pcre16_malloc = new_malloc;
2562    pcre16_free = new_free;
2563    pcre16_stack_malloc = stack_malloc;
2564    pcre16_stack_free = stack_free;
2565    #endif
2566    
2567    /* Heading line unless quiet, then prompt for first regex if stdin */
2568    
2569    if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2570    
2571    /* Main loop */
2572    
2573    while (!done)
2574      {
2575      pcre *re = NULL;
2576      pcre_extra *extra = NULL;
2577    
2578    #if !defined NOPOSIX  /* There are still compilers that require no indent */
2579      regex_t preg;
2580      int do_posix = 0;
2581    #endif
2582    
2583      const char *error;
2584      pcre_uint8 *markptr;
2585      pcre_uint8 *p, *pp, *ppp;
2586      pcre_uint8 *to_file = NULL;
2587      const pcre_uint8 *tables = NULL;
2588      unsigned long int get_options;
2589      unsigned long int true_size, true_study_size = 0;
2590      size_t size, regex_gotten_store;
2591      int do_allcaps = 0;
2592      int do_mark = 0;
2593      int do_study = 0;
2594      int no_force_study = 0;
2595      int do_debug = debug;
2596      int do_G = 0;
2597      int do_g = 0;
2598      int do_showinfo = showinfo;
2599      int do_showrest = 0;
2600      int do_showcaprest = 0;
2601      int do_flip = 0;
2602      int erroroffset, len, delimiter, poffset;
2603    
2604    #if !defined NODFA
2605      int dfa_matched = 0;
2606    #endif
2607    
2608      use_utf = 0;
2609      debug_lengths = 1;
2610    
2611      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
2612      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2613      fflush(outfile);
2614    
2615      p = buffer;
2616      while (isspace(*p)) p++;
2617      if (*p == 0) continue;
2618    
2619      /* See if the pattern is to be loaded pre-compiled from a file. */
2620    
2621      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2622        {
2623        pcre_uint32 magic;
2624        pcre_uint8 sbuf[8];
2625        FILE *f;
2626    
2627        p++;
2628        if (*p == '!')
2629          {
2630          do_debug = TRUE;
2631          do_showinfo = TRUE;
2632          p++;
2633          }
2634    
2635        pp = p + (int)strlen((char *)p);
2636        while (isspace(pp[-1])) pp--;
2637        *pp = 0;
2638    
2639        f = fopen((char *)p, "rb");
2640        if (f == NULL)
2641          {
2642          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2643          continue;
2644          }
2645    
2646        first_gotten_store = 0;
2647        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2648    
2649        true_size =
2650          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2651        true_study_size =
2652          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2653    
2654        re = (pcre *)new_malloc(true_size);
2655        regex_gotten_store = first_gotten_store;
2656    
2657        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2658    
2659        magic = ((REAL_PCRE *)re)->magic_number;
2660        if (magic != MAGIC_NUMBER)
2661          {
2662          if (swap_uint32(magic) == MAGIC_NUMBER)
2663            {
2664            do_flip = 1;
2665            }
2666          else
2667            {
2668            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2669            fclose(f);
2670            continue;
2671            }
2672          }
2673    
2674        /* We hide the byte-invert info for little and big endian tests. */
2675        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2676          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
2677    
2678        /* Now see if there is any following study data. */
2679    
2680        if (true_study_size != 0)
2681          {
2682          pcre_study_data *psd;
2683    
2684          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2685          extra->flags = PCRE_EXTRA_STUDY_DATA;
2686    
2687          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2688          extra->study_data = psd;
2689    
2690          if (fread(psd, 1, true_study_size, f) != true_study_size)
2691            {
2692            FAIL_READ:
2693            fprintf(outfile, "Failed to read data from %s\n", p);
2694            if (extra != NULL)
2695              {
2696              PCRE_FREE_STUDY(extra);
2697              }
2698            if (re != NULL) new_free(re);
2699            fclose(f);
2700            continue;
2701            }
2702          fprintf(outfile, "Study data loaded from %s\n", p);
2703          do_study = 1;     /* To get the data output if requested */
2704          }
2705        else fprintf(outfile, "No study data\n");
2706    
2707        /* Flip the necessary bytes. */
2708        if (do_flip)
2709          {
2710          int rc;
2711          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2712          if (rc == PCRE_ERROR_BADMODE)
2713            {
2714            /* Simulate the result of the function call below. */
2715            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2716              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2717            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2718              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2719            continue;
2720            }
2721          }
2722    
2723        /* Need to know if UTF-8 for printing data strings. */
2724    
2725        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2726        use_utf = (get_options & PCRE_UTF8) != 0;
2727    
2728        fclose(f);
2729        goto SHOW_INFO;
2730        }
2731    
2732      /* In-line pattern (the usual case). Get the delimiter and seek the end of
2733      the pattern; if it isn't complete, read more. */
2734    
2735      delimiter = *p++;
2736    
2737      if (isalnum(delimiter) || delimiter == '\\')
2738        {
2739        fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2740        goto SKIP_DATA;
2741        }
2742    
2743      pp = p;
2744      poffset = (int)(p - buffer);
2745    
2746      for(;;)
2747        {
2748        while (*pp != 0)
2749          {
2750          if (*pp == '\\' && pp[1] != 0) pp++;
2751            else if (*pp == delimiter) break;
2752          pp++;
2753          }
2754        if (*pp != 0) break;
2755        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
2756          {
2757          fprintf(outfile, "** Unexpected EOF\n");
2758          done = 1;
2759          goto CONTINUE;
2760          }
2761        if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2762        }
2763    
2764      /* The buffer may have moved while being extended; reset the start of data
2765      pointer to the correct relative point in the buffer. */
2766    
2767      p = buffer + poffset;
2768    
2769      /* If the first character after the delimiter is backslash, make
2770      the pattern end with backslash. This is purely to provide a way
2771      of testing for the error message when a pattern ends with backslash. */
2772    
2773      if (pp[1] == '\\') *pp++ = '\\';
2774    
2775      /* Terminate the pattern at the delimiter, and save a copy of the pattern
2776      for callouts. */
2777    
2778      *pp++ = 0;
2779      strcpy((char *)pbuffer, (char *)p);
2780    
2781      /* Look for options after final delimiter */
2782    
2783      options = 0;
2784      study_options = 0;
2785      log_store = showstore;  /* default from command line */
2786    
2787      while (*pp != 0)
2788        {
2789        switch (*pp++)
2790          {
2791          case 'f': options |= PCRE_FIRSTLINE; break;
2792          case 'g': do_g = 1; break;
2793          case 'i': options |= PCRE_CASELESS; break;
2794          case 'm': options |= PCRE_MULTILINE; break;
2795          case 's': options |= PCRE_DOTALL; break;
2796          case 'x': options |= PCRE_EXTENDED; break;
2797    
2798          case '+':
2799          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2800          break;
2801    
2802          case '=': do_allcaps = 1; break;
2803          case 'A': options |= PCRE_ANCHORED; break;
2804          case 'B': do_debug = 1; break;
2805          case 'C': options |= PCRE_AUTO_CALLOUT; break;
2806          case 'D': do_debug = do_showinfo = 1; break;
2807          case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2808          case 'F': do_flip = 1; break;
2809          case 'G': do_G = 1; break;
2810          case 'I': do_showinfo = 1; break;
2811          case 'J': options |= PCRE_DUPNAMES; break;
2812          case 'K': do_mark = 1; break;
2813          case 'M': log_store = 1; break;
2814          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2815    
2816    #if !defined NOPOSIX
2817          case 'P': do_posix = 1; break;
2818    #endif
2819    
2820          case 'S':
2821          if (do_study == 0)
2822            {
2823            do_study = 1;
2824            if (*pp == '+')
2825              {
2826              if (*(++pp) == '+')
2827                {
2828                verify_jit = TRUE;
2829                pp++;
2830                }
2831              if (*pp >= '1' && *pp <= '7')
2832                study_options |= jit_study_bits[*pp++ - '1'];
2833              else
2834                study_options |= jit_study_bits[6];
2835              }
2836            }
2837          else
2838            {
2839            do_study = 0;
2840            no_force_study = 1;
2841            }
2842          break;
2843    
2844          case 'U': options |= PCRE_UNGREEDY; break;
2845          case 'W': options |= PCRE_UCP; break;
2846          case 'X': options |= PCRE_EXTRA; break;
2847          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2848          case 'Z': debug_lengths = 0; break;
2849          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2850          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2851    
2852          case 'T':
2853          switch (*pp++)
2854            {
2855            case '0': tables = tables0; break;
2856            case '1': tables = tables1; break;
2857    
2858            case '\r':
2859            case '\n':
2860            case ' ':
2861            case 0:
2862            fprintf(outfile, "** Missing table number after /T\n");
2863            goto SKIP_DATA;
2864    
2865            default:
2866            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2867            goto SKIP_DATA;
2868            }
2869          break;
2870    
2871          case 'L':
2872          ppp = pp;
2873          /* The '\r' test here is so that it works on Windows. */
2874          /* The '0' test is just in case this is an unterminated line. */
2875          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2876          *ppp = 0;
2877          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2878            {
2879            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2880            goto SKIP_DATA;
2881            }
2882          locale_set = 1;
2883          tables = PCRE_MAKETABLES;
2884          pp = ppp;
2885          break;
2886    
2887          case '>':
2888          to_file = pp;
2889          while (*pp != 0) pp++;
2890          while (isspace(pp[-1])) pp--;
2891          *pp = 0;
2892          break;
2893    
2894          case '<':
2895            {
2896            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2897              {
2898              options |= PCRE_JAVASCRIPT_COMPAT;
2899              pp += 3;
2900              }
2901            else
2902              {
2903              int x = check_newline(pp, outfile);
2904              if (x == 0) goto SKIP_DATA;
2905              options |= x;
2906              while (*pp++ != '>');
2907              }
2908            }
2909          break;
2910    
2911          case '\r':                      /* So that it works in Windows */
2912          case '\n':
2913          case ' ':
2914          break;
2915    
2916          default:
2917          fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2918          goto SKIP_DATA;
2919          }
2920        }
2921    
2922      /* Handle compiling via the POSIX interface, which doesn't support the
2923      timing, showing, or debugging options, nor the ability to pass over
2924      local character tables. Neither does it have 16-bit support. */
2925    
2926    #if !defined NOPOSIX
2927      if (posix || do_posix)
2928        {
2929        int rc;
2930        int cflags = 0;
2931    
2932        if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2933        if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2934        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2935        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2936        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2937        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2938        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2939    
2940        first_gotten_store = 0;
2941        rc = regcomp(&preg, (char *)p, cflags);
2942    
2943        /* Compilation failed; go back for another re, skipping to blank line
2944        if non-interactive. */
2945    
2946        if (rc != 0)
2947          {
2948          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2949          fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2950          goto SKIP_DATA;
2951          }
2952        }
2953    
2954      /* Handle compiling via the native interface */
2955    
2956      else
2957    #endif  /* !defined NOPOSIX */
2958    
2959        {
2960        /* In 16-bit mode, convert the input. */
2961    
2962    #ifdef SUPPORT_PCRE16
2963        if (use_pcre16)
2964          {
2965          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2966            {
2967            case -1:
2968            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2969              "converted to UTF-16\n");
2970            goto SKIP_DATA;
2971    
2972            case -2:
2973            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2974              "cannot be converted to UTF-16\n");
2975            goto SKIP_DATA;
2976    
2977            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2978            fprintf(outfile, "**Failed: character value greater than 0xffff "
2979              "cannot be converted to 16-bit in non-UTF mode\n");
2980            goto SKIP_DATA;
2981    
2982            default:
2983            break;
2984            }
2985          p = (pcre_uint8 *)buffer16;
2986          }
2987    #endif
2988    
2989        /* Compile many times when timing */
2990    
2991        if (timeit > 0)
2992          {
2993          register int i;
2994          clock_t time_taken;
2995          clock_t start_time = clock();
2996          for (i = 0; i < timeit; i++)
2997            {
2998            PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2999            if (re != NULL) free(re);
3000            }
3001          time_taken = clock() - start_time;
3002          fprintf(outfile, "Compile time %.4f milliseconds\n",
3003            (((double)time_taken * 1000.0) / (double)timeit) /
3004              (double)CLOCKS_PER_SEC);
3005          }
3006    
3007        first_gotten_store = 0;
3008        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
3009    
3010      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
3011      if non-interactive. */      if non-interactive. */
# Line 464  for (;;) Line 3018  for (;;)
3018          {          {
3019          for (;;)          for (;;)
3020            {            {
3021            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
3022              goto END_OFF;              {
3023                done = 1;
3024                goto CONTINUE;
3025                }
3026            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
3027            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
3028            if (len == 0) break;            if (len == 0) break;
3029            }            }
3030          fprintf(outfile, "\n");          fprintf(outfile, "\n");
3031          }          }
3032        continue;        goto CONTINUE;
3033        }        }
3034    
3035      /* Compilation succeeded; print data if required */      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3036        within the regex; check for this so that we know how to process the data
3037        lines. */
3038    
3039      if (showinfo || do_debug)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3040        {        goto SKIP_DATA;
3041        int first_char, count;      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
3042    
3043        if (debug || do_debug) print_internals(re);      /* Extract the size for possible writing before possibly flipping it,
3044        and remember the store that was got. */
3045    
3046        count = pcre_info(re, &options, &first_char);      true_size = ((REAL_PCRE *)re)->size;
3047        if (count < 0) fprintf(outfile,      regex_gotten_store = first_gotten_store;
3048          "Error %d while reading info\n", count);  
3049        else      /* Output code size information if requested */
3050    
3051        if (log_store)
3052          fprintf(outfile, "Memory allocation (code space): %d\n",
3053            (int)(first_gotten_store -
3054                  sizeof(REAL_PCRE) -
3055                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3056    
3057        /* If -s or /S was present, study the regex to generate additional info to
3058        help with the matching, unless the pattern has the SS option, which
3059        suppresses the effect of /S (used for a few test patterns where studying is
3060        never sensible). */
3061    
3062        if (do_study || (force_study >= 0 && !no_force_study))
3063          {
3064          if (timeit > 0)
3065          {          {
3066          fprintf(outfile, "Identifying subpattern count = %d\n", count);          register int i;
3067          if (options == 0) fprintf(outfile, "No options\n");          clock_t time_taken;
3068            else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",          clock_t start_time = clock();
3069              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",          for (i = 0; i < timeit; i++)
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "");  
         if (first_char == -1)  
3070            {            {
3071            fprintf(outfile, "First char at start or follows \\n\n");            PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3072            }            }
3073          else if (first_char < 0)          time_taken = clock() - start_time;
3074            if (extra != NULL)
3075            {            {
3076            fprintf(outfile, "No first char\n");            PCRE_FREE_STUDY(extra);
3077            }            }
3078          else          fprintf(outfile, "  Study time %.4f milliseconds\n",
3079              (((double)time_taken * 1000.0) / (double)timeit) /
3080                (double)CLOCKS_PER_SEC);
3081            }
3082          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3083          if (error != NULL)
3084            fprintf(outfile, "Failed to study: %s\n", error);
3085          else if (extra != NULL)
3086            {
3087            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3088            if (log_store)
3089            {            {
3090            if (isprint(first_char))            size_t jitsize;
3091              fprintf(outfile, "First char = \'%c\'\n", first_char);            if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3092            else                jitsize != 0)
3093              fprintf(outfile, "First char = %d\n", first_char);              fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3094            }            }
3095          }          }
3096        }        }
3097    
3098      /* If /S was present, study the regexp to generate additional info to      /* If /K was present, we set up for handling MARK data. */
     help with the matching. */  
3099    
3100      if (do_study)      if (do_mark)
3101        {        {
3102        if (timeit)        if (extra == NULL)
3103          {          {
3104          register int i;          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3105          clock_t time_taken;          extra->flags = 0;
         clock_t start_time = clock();  
         for (i = 0; i < 4000; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.2f milliseconds\n",  
           ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
3106          }          }
3107          extra->mark = &markptr;
3108          extra->flags |= PCRE_EXTRA_MARK;
3109          }
3110    
3111        extra = pcre_study(re, study_options, &error);      /* Extract and display information from the compiled data if required. */
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
3112    
3113        /* This looks at internal information. A bit kludgy to do it this      SHOW_INFO:
       way, but it is useful for testing. */  
3114    
3115        else if (showinfo || do_debug)      if (do_debug)
3116          {
3117          fprintf(outfile, "------------------------------------------------------------------\n");
3118          PCRE_PRINTINT(re, outfile, debug_lengths);
3119          }
3120    
3121        /* We already have the options in get_options (see above) */
3122    
3123        if (do_showinfo)
3124          {
3125          unsigned long int all_options;
3126          int count, backrefmax, first_char, need_char, okpartial, jchanged,
3127            hascrorlf, maxlookbehind;
3128          int nameentrysize, namecount;
3129          const pcre_uint8 *nametable;
3130    
3131          if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3132              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3133              new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3134              new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3135              new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3136              new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3137              new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3138              new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3139              new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3140              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3141              new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf) +
3142              new_info(re, NULL, PCRE_INFO_MAXLOOKBEHIND, &maxlookbehind)
3143              != 0)
3144            goto SKIP_DATA;
3145    
3146          if (size != regex_gotten_store) fprintf(outfile,
3147            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
3148            (int)size, (int)regex_gotten_store);
3149    
3150          fprintf(outfile, "Capturing subpattern count = %d\n", count);
3151          if (backrefmax > 0)
3152            fprintf(outfile, "Max back reference = %d\n", backrefmax);
3153    
3154          if (namecount > 0)
3155            {
3156            fprintf(outfile, "Named capturing subpatterns:\n");
3157            while (namecount-- > 0)
3158              {
3159    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3160              int imm2_size = use_pcre16 ? 1 : 2;
3161    #else
3162              int imm2_size = IMM2_SIZE;
3163    #endif
3164              int length = (int)STRLEN(nametable + imm2_size);
3165              fprintf(outfile, "  ");
3166              PCHARSV(nametable, imm2_size, length, outfile);
3167              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3168    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3169              fprintf(outfile, "%3d\n", use_pcre16?
3170                 (int)(((PCRE_SPTR16)nametable)[0])
3171                :((int)nametable[0] << 8) | (int)nametable[1]);
3172              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3173    #else
3174              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3175    #ifdef SUPPORT_PCRE8
3176              nametable += nameentrysize;
3177    #else
3178              nametable += nameentrysize * 2;
3179    #endif
3180    #endif
3181              }
3182            }
3183    
3184          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3185          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3186    
3187          all_options = ((REAL_PCRE *)re)->options;
3188          if (do_flip) all_options = swap_uint32(all_options);
3189    
3190          if (get_options == 0) fprintf(outfile, "No options\n");
3191            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3192              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3193              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3194              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3195              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3196              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3197              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3198              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3199              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3200              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3201              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3202              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3203              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3204              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3205              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3206              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3207              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3208              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3209    
3210          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3211    
3212          switch (get_options & PCRE_NEWLINE_BITS)
3213            {
3214            case PCRE_NEWLINE_CR:
3215            fprintf(outfile, "Forced newline sequence: CR\n");
3216            break;
3217    
3218            case PCRE_NEWLINE_LF:
3219            fprintf(outfile, "Forced newline sequence: LF\n");
3220            break;
3221    
3222            case PCRE_NEWLINE_CRLF:
3223            fprintf(outfile, "Forced newline sequence: CRLF\n");
3224            break;
3225    
3226            case PCRE_NEWLINE_ANYCRLF:
3227            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3228            break;
3229    
3230            case PCRE_NEWLINE_ANY:
3231            fprintf(outfile, "Forced newline sequence: ANY\n");
3232            break;
3233    
3234            default:
3235            break;
3236            }
3237    
3238          if (first_char == -1)
3239            {
3240            fprintf(outfile, "First char at start or follows newline\n");
3241            }
3242          else if (first_char < 0)
3243            {
3244            fprintf(outfile, "No first char\n");
3245            }
3246          else
3247            {
3248            const char *caseless =
3249              ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3250              "" : " (caseless)";
3251    
3252            if (PRINTOK(first_char))
3253              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3254            else
3255              {
3256              fprintf(outfile, "First char = ");
3257              pchar(first_char, outfile);
3258              fprintf(outfile, "%s\n", caseless);
3259              }
3260            }
3261    
3262          if (need_char < 0)
3263          {          {
3264          real_pcre_extra *xx = (real_pcre_extra *)extra;          fprintf(outfile, "No need char\n");
3265          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          }
3266            fprintf(outfile, "No starting character set\n");        else
3267            {
3268            const char *caseless =
3269              ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3270              "" : " (caseless)";
3271    
3272            if (PRINTOK(need_char))
3273              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3274            else
3275              {
3276              fprintf(outfile, "Need char = ");
3277              pchar(need_char, outfile);
3278              fprintf(outfile, "%s\n", caseless);
3279              }
3280            }
3281    
3282          if (maxlookbehind > 0)
3283            fprintf(outfile, "Max lookbehind = %d\n", maxlookbehind);
3284    
3285          /* Don't output study size; at present it is in any case a fixed
3286          value, but it varies, depending on the computer architecture, and
3287          so messes up the test suite. (And with the /F option, it might be
3288          flipped.) If study was forced by an external -s, don't show this
3289          information unless -i or -d was also present. This means that, except
3290          when auto-callouts are involved, the output from runs with and without
3291          -s should be identical. */
3292    
3293          if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3294            {
3295            if (extra == NULL)
3296              fprintf(outfile, "Study returned NULL\n");
3297          else          else
3298            {            {
3299            int i;            pcre_uint8 *start_bits = NULL;
3300            int c = 24;            int minlength;
3301            fprintf(outfile, "Starting character set: ");  
3302            for (i = 0; i < 256; i++)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3303                fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3304    
3305              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3306              {              {
3307              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (start_bits == NULL)
3308                  fprintf(outfile, "No set of starting bytes\n");
3309                else
3310                {                {
3311                if (c > 75)                int i;
3312                  {                int c = 24;
3313                  fprintf(outfile, "\n  ");                fprintf(outfile, "Starting byte set: ");
3314                  c = 2;                for (i = 0; i < 256; i++)
                 }  
               if (isprint(i) && i != ' ')  
3315                  {                  {
3316                  fprintf(outfile, "%c ", i);                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
3317                  c += 2;                    {
3318                  }                    if (c > 75)
3319                else                      {
3320                  {                      fprintf(outfile, "\n  ");
3321                  fprintf(outfile, "\\x%02x ", i);                      c = 2;
3322                  c += 5;                      }
3323                      if (PRINTOK(i) && i != ' ')
3324                        {
3325                        fprintf(outfile, "%c ", i);
3326                        c += 2;
3327                        }
3328                      else
3329                        {
3330                        fprintf(outfile, "\\x%02x ", i);
3331                        c += 5;
3332                        }
3333                      }
3334                  }                  }
3335                  fprintf(outfile, "\n");
3336                }                }
3337              }              }
3338            fprintf(outfile, "\n");            }
3339    
3340            /* Show this only if the JIT was set by /S, not by -s. */
3341    
3342            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3343              {
3344              int jit;
3345              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3346                {
3347                if (jit)
3348                  fprintf(outfile, "JIT study was successful\n");
3349                else
3350    #ifdef SUPPORT_JIT
3351                  fprintf(outfile, "JIT study was not successful\n");
3352    #else
3353                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3354    #endif
3355                }
3356            }            }
3357          }          }
3358        }        }
3359      }  
3360        /* If the '>' option was present, we write out the regex to a file, and
3361        that is all. The first 8 bytes of the file are the regex length and then
3362        the study length, in big-endian order. */
3363    
3364        if (to_file != NULL)
3365          {
3366          FILE *f = fopen((char *)to_file, "wb");
3367          if (f == NULL)
3368            {
3369            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
3370            }
3371          else
3372            {
3373            pcre_uint8 sbuf[8];
3374    
3375            if (do_flip) regexflip(re, extra);
3376            sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3377            sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3378            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3379            sbuf[3] = (pcre_uint8)((true_size) & 255);
3380            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3381            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3382            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3383            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3384    
3385            if (fwrite(sbuf, 1, 8, f) < 8 ||
3386                fwrite(re, 1, true_size, f) < true_size)
3387              {
3388              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
3389              }
3390            else
3391              {
3392              fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3393    
3394              /* If there is study data, write it. */
3395    
3396              if (extra != NULL)
3397                {
3398                if (fwrite(extra->study_data, 1, true_study_size, f) <
3399                    true_study_size)
3400                  {
3401                  fprintf(outfile, "Write error on %s: %s\n", to_file,
3402                    strerror(errno));
3403                  }
3404                else fprintf(outfile, "Study data written to %s\n", to_file);
3405                }
3406              }
3407            fclose(f);
3408            }
3409    
3410          new_free(re);
3411          if (extra != NULL)
3412            {
3413            PCRE_FREE_STUDY(extra);
3414            }
3415          if (locale_set)
3416            {
3417            new_free((void *)tables);
3418            setlocale(LC_CTYPE, "C");
3419            locale_set = 0;
3420            }
3421          continue;  /* With next regex */
3422          }
3423        }        /* End of non-POSIX compile */
3424    
3425    /* Read data lines and test them */    /* Read data lines and test them */
3426    
3427    for (;;)    for (;;)
3428      {      {
3429      unsigned char *q;      pcre_uint8 *q;
3430        pcre_uint8 *bptr;
3431        int *use_offsets = offsets;
3432        int use_size_offsets = size_offsets;
3433        int callout_data = 0;
3434        int callout_data_set = 0;
3435      int count, c;      int count, c;
3436      int offsets[30];      int copystrings = 0;
3437      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = default_find_match_limit;
3438        int getstrings = 0;
3439        int getlist = 0;
3440        int gmatched = 0;
3441        int start_offset = 0;
3442        int start_offset_sign = 1;
3443        int g_notempty = 0;
3444        int use_dfa = 0;
3445    
3446        *copynames = 0;
3447        *getnames = 0;
3448    
3449    #ifdef SUPPORT_PCRE16
3450        cn16ptr = copynames;
3451        gn16ptr = getnames;
3452    #endif
3453    #ifdef SUPPORT_PCRE8
3454        cn8ptr = copynames8;
3455        gn8ptr = getnames8;
3456    #endif
3457    
3458        SET_PCRE_CALLOUT(callout);
3459        first_callout = 1;
3460        last_callout_mark = NULL;
3461        callout_extra = 0;
3462        callout_count = 0;
3463        callout_fail_count = 999999;
3464        callout_fail_id = -1;
3465        show_malloc = 0;
3466      options = 0;      options = 0;
3467    
3468      if (infile == stdin) printf("  data> ");      if (extra != NULL) extra->flags &=
3469      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
3470      if (infile != stdin) fprintf(outfile, (char *)buffer);  
3471        len = 0;
3472        for (;;)
3473          {
3474          if (extend_inputline(infile, buffer + len, "data> ") == NULL)
3475            {
3476            if (len > 0)    /* Reached EOF without hitting a newline */
3477              {
3478              fprintf(outfile, "\n");
3479              break;
3480              }
3481            done = 1;
3482            goto CONTINUE;
3483            }
3484          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
3485          len = (int)strlen((char *)buffer);
3486          if (buffer[len-1] == '\n') break;
3487          }
3488    
     len = (int)strlen((char *)buffer);  
3489      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
3490      buffer[len] = 0;      buffer[len] = 0;
3491      if (len == 0) break;      if (len == 0) break;
# Line 603  for (;;) Line 3493  for (;;)
3493      p = buffer;      p = buffer;
3494      while (isspace(*p)) p++;      while (isspace(*p)) p++;
3495    
3496      q = dbuffer;      bptr = q = dbuffer;
3497      while ((c = *p++) != 0)      while ((c = *p++) != 0)
3498        {        {
3499        int i = 0;        int i = 0;
3500        int n = 0;        int n = 0;
3501        if (c == '\\') switch ((c = *p++))  
3502          /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3503          In non-UTF mode, allow the value of the byte to fall through to later,
3504          where values greater than 127 are turned into UTF-8 when running in
3505          16-bit mode. */
3506    
3507          if (c != '\\')
3508            {
3509            if (use_utf)
3510              {
3511              *q++ = c;
3512              continue;
3513              }
3514            }
3515    
3516          /* Handle backslash escapes */
3517    
3518          else switch ((c = *p++))
3519          {          {
3520          case 'a': c =    7; break;          case 'a': c =    7; break;
3521          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 627  for (;;) Line 3534  for (;;)
3534          break;          break;
3535    
3536          case 'x':          case 'x':
3537            if (*p == '{')
3538              {
3539              pcre_uint8 *pt = p;
3540              c = 0;
3541    
3542              /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3543              when isxdigit() is a macro that refers to its argument more than
3544              once. This is banned by the C Standard, but apparently happens in at
3545              least one MacOS environment. */
3546    
3547              for (pt++; isxdigit(*pt); pt++)
3548                {
3549                if (++i == 9)
3550                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3551                                   "using only the first eight.\n");
3552                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3553                }
3554              if (*pt == '}')
3555                {
3556                p = pt + 1;
3557                break;
3558                }
3559              /* Not correct form for \x{...}; fall through */
3560              }
3561    
3562            /* \x without {} always defines just one byte in 8-bit mode. This
3563            allows UTF-8 characters to be constructed byte by byte, and also allows
3564            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3565            Otherwise, pass it down to later code so that it can be turned into
3566            UTF-8 when running in 16-bit mode. */
3567    
3568          c = 0;          c = 0;
3569          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3570            {            {
3571            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3572            p++;            p++;
3573            }            }
3574            if (use_utf)
3575              {
3576              *q++ = c;
3577              continue;
3578              }
3579          break;          break;
3580    
3581          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
3582          p--;          p--;
3583          continue;          continue;
3584    
3585          case 'A':  /* Option setting */          case '>':
3586          options |= PCRE_ANCHORED;          if (*p == '-')
3587              {
3588              start_offset_sign = -1;
3589              p++;
3590              }
3591            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3592            start_offset *= start_offset_sign;
3593            continue;
3594    
3595            case 'A':  /* Option setting */
3596            options |= PCRE_ANCHORED;
3597            continue;
3598    
3599            case 'B':
3600            options |= PCRE_NOTBOL;
3601            continue;
3602    
3603            case 'C':
3604            if (isdigit(*p))    /* Set copy string */
3605              {
3606              while(isdigit(*p)) n = n * 10 + *p++ - '0';
3607              copystrings |= 1 << n;
3608              }
3609            else if (isalnum(*p))
3610              {
3611              READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
3612              }
3613            else if (*p == '+')
3614              {
3615              callout_extra = 1;
3616              p++;
3617              }
3618            else if (*p == '-')
3619              {
3620              SET_PCRE_CALLOUT(NULL);
3621              p++;
3622              }
3623            else if (*p == '!')
3624              {
3625              callout_fail_id = 0;
3626              p++;
3627              while(isdigit(*p))
3628                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3629              callout_fail_count = 0;
3630              if (*p == '!')
3631                {
3632                p++;
3633                while(isdigit(*p))
3634                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3635                }
3636              }
3637            else if (*p == '*')
3638              {
3639              int sign = 1;
3640              callout_data = 0;
3641              if (*(++p) == '-') { sign = -1; p++; }
3642              while(isdigit(*p))
3643                callout_data = callout_data * 10 + *p++ - '0';
3644              callout_data *= sign;
3645              callout_data_set = 1;
3646              }
3647            continue;
3648    
3649    #if !defined NODFA
3650            case 'D':
3651    #if !defined NOPOSIX
3652            if (posix || do_posix)
3653              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3654            else
3655    #endif
3656              use_dfa = 1;
3657            continue;
3658    #endif
3659    
3660    #if !defined NODFA
3661            case 'F':
3662            options |= PCRE_DFA_SHORTEST;
3663            continue;
3664    #endif
3665    
3666            case 'G':
3667            if (isdigit(*p))
3668              {
3669              while(isdigit(*p)) n = n * 10 + *p++ - '0';
3670              getstrings |= 1 << n;
3671              }
3672            else if (isalnum(*p))
3673              {
3674              READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3675              }
3676            continue;
3677    
3678            case 'J':
3679            while(isdigit(*p)) n = n * 10 + *p++ - '0';
3680            if (extra != NULL
3681                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3682                && extra->executable_jit != NULL)
3683              {
3684              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3685              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3686              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3687              }
3688            continue;
3689    
3690            case 'L':
3691            getlist = 1;
3692            continue;
3693    
3694            case 'M':
3695            find_match_limit = 1;
3696            continue;
3697    
3698            case 'N':
3699            if ((options & PCRE_NOTEMPTY) != 0)
3700              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3701            else
3702              options |= PCRE_NOTEMPTY;
3703            continue;
3704    
3705            case 'O':
3706            while(isdigit(*p)) n = n * 10 + *p++ - '0';
3707            if (n > size_offsets_max)
3708              {
3709              size_offsets_max = n;
3710              free(offsets);
3711              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3712              if (offsets == NULL)
3713                {
3714                printf("** Failed to get %d bytes of memory for offsets vector\n",
3715                  (int)(size_offsets_max * sizeof(int)));
3716                yield = 1;
3717                goto EXIT;
3718                }
3719              }
3720            use_size_offsets = n;
3721            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
3722              else use_offsets = offsets + size_offsets_max - n;  /* To catch overruns */
3723          continue;          continue;
3724    
3725          case 'B':          case 'P':
3726          options |= PCRE_NOTBOL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3727              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3728          continue;          continue;
3729    
3730          case 'E':          case 'Q':
3731          options |= PCRE_DOLLAR_ENDONLY;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3732            if (extra == NULL)
3733              {
3734              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3735              extra->flags = 0;
3736              }
3737            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3738            extra->match_limit_recursion = n;
3739          continue;          continue;
3740    
3741          case 'I':          case 'q':
3742          options |= PCRE_CASELESS;          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3743            if (extra == NULL)
3744              {
3745              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3746              extra->flags = 0;
3747              }
3748            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3749            extra->match_limit = n;
3750          continue;          continue;
3751    
3752          case 'M':  #if !defined NODFA
3753          options |= PCRE_MULTILINE;          case 'R':
3754            options |= PCRE_DFA_RESTART;
3755          continue;          continue;
3756    #endif
3757    
3758          case 'S':          case 'S':
3759          options |= PCRE_DOTALL;          show_malloc = 1;
3760          continue;          continue;
3761    
3762          case 'O':          case 'Y':
3763 &n