/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 39 by nigel, Sat Feb 24 21:39:13 2007 UTC code/branches/pcre16/pcretest.c revision 814 by ph10, Wed Dec 21 12:05:24 2011 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <stdio.h>  #include <stdio.h>
46  #include <string.h>  #include <string.h>
47  #include <stdlib.h>  #include <stdlib.h>
48  #include <time.h>  #include <time.h>
49  #include <locale.h>  #include <locale.h>
50    #include <errno.h>
51    
52    #ifdef SUPPORT_LIBREADLINE
53    #ifdef HAVE_UNISTD_H
54    #include <unistd.h>
55    #endif
56    #include <readline/readline.h>
57    #include <readline/history.h>
58    #endif
59    
 /* Use the internal info for displaying the results of pcre_study(). */  
60    
61  #include "internal.h"  /* A number of things vary for Windows builds. Originally, pcretest opened its
62    input and output without "b"; then I was told that "b" was needed in some
63    environments, so it was added for release 5.0 to both the input and output. (It
64    makes no difference on Unix-like systems.) Later I was told that it is wrong
65    for the input on Windows. I've now abstracted the modes into two macros that
66    are set here, to make it easier to fiddle with them, and removed "b" from the
67    input mode under Windows. */
68    
69    #if defined(_WIN32) || defined(WIN32)
70    #include <io.h>                /* For _setmode() */
71    #include <fcntl.h>             /* For _O_BINARY */
72    #define INPUT_MODE   "r"
73    #define OUTPUT_MODE  "wb"
74    
75    #ifndef isatty
76    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
77    #endif                         /* though in some environments they seem to   */
78                                   /* be already defined, hence the #ifndefs.    */
79    #ifndef fileno
80    #define fileno _fileno
81    #endif
82    
83    /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85    #ifdef __BORLANDC__
86    #define _setmode(handle, mode) setmode(handle, mode)
87    #endif
88    
89    /* Not Windows */
90    
91    #else
92    #include <sys/time.h>          /* These two includes are needed */
93    #include <sys/resource.h>      /* for setrlimit(). */
94    #define INPUT_MODE   "rb"
95    #define OUTPUT_MODE  "wb"
96    #endif
97    
98    
99    /* We have to include pcre_internal.h because we need the internal info for
100    displaying the results of pcre_study() and we also need to know about the
101    internal macros, structures, and other internal data values; pcretest has
102    "inside information" compared to a program that strictly follows the PCRE API.
103    
104    Although pcre_internal.h does itself include pcre.h, we explicitly include it
105    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106    appropriately for an application, not for building PCRE. */
107    
108    #include "pcre.h"
109    #include "pcre_internal.h"
110    
111    /* The pcre_printint() function, which prints the internal form of a compiled
112    regex, is held in a separate file so that (a) it can be compiled in either
113    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114    when that is compiled in debug mode. */
115    
116    #ifdef SUPPORT_PCRE8
117    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118    #endif
119    #ifdef SUPPORT_PCRE16
120    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121    #endif
122    
123    /* We need access to some of the data tables that PCRE uses. So as not to have
124    to keep two copies, we include the source file here, changing the names of the
125    external symbols to prevent clashes. */
126    
127    #define _pcre_ucp_gentype      ucp_gentype
128    #define _pcre_ucp_typerange    ucp_typerange
129    #define _pcre_utf8_table1      utf8_table1
130    #define _pcre_utf8_table1_size utf8_table1_size
131    #define _pcre_utf8_table2      utf8_table2
132    #define _pcre_utf8_table3      utf8_table3
133    #define _pcre_utf8_table4      utf8_table4
134    #define _pcre_utt              utt
135    #define _pcre_utt_size         utt_size
136    #define _pcre_utt_names        utt_names
137    #define _pcre_OP_lengths       OP_lengths
138    
139    #include "pcre_tables.c"
140    
141    /* The definition of the macro PRINTABLE, which determines whether to print an
142    output character as-is or as a hex value when showing compiled patterns, is
143    the same as in the printint.src file. We uses it here in cases when the locale
144    has not been explicitly changed, so as to get consistent output from systems
145    that differ in their output from isprint() even in the "C" locale. */
146    
147    #ifdef EBCDIC
148    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149    #else
150    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151    #endif
152    
153    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154    
155  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
156  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 160  Makefile. */
160  #include "pcreposix.h"  #include "pcreposix.h"
161  #endif  #endif
162    
163    /* It is also possible, originally for the benefit of a version that was
164    imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165    without the interface to the DFA matcher (NODFA), and without the doublecheck
166    of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167    out the UTF8 support if PCRE is built without it. */
168    
169    #ifndef SUPPORT_UTF8
170    #ifndef NOUTF8
171    #define NOUTF8
172    #endif
173    #endif
174    
175    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177    only from one place and is handled differently). I couldn't dream up any way of
178    using a single macro to do this in a generic way, because of the many different
179    argument requirements. We know that at least one of SUPPORT_PCRE8 and
180    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181    use these in the definitions of generic macros. */
182    
183    #ifdef SUPPORT_PCRE8
184    
185    #define PCHARS8(lv, p, len, f) \
186      lv = pchars((pcre_uint8 *)p, len, f)
187    
188    #define PCHARSV8(p, len, f) \
189      (void)pchars((pcre_uint8 *)p, len, f)
190    
191    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
192      re = pcre_compile((char *)pat, options, error, erroffset, tables)
193    
194    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
195        offsets, size_offsets, workspace, size_workspace) \
196      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
197        offsets, size_offsets, workspace, size_workspace)
198    
199    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
200        offsets, size_offsets) \
201      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
202        offsets, size_offsets)
203    
204    #define PCRE_FREE_STUDY8(extra) \
205      pcre_free_study(extra)
206    
207    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
208      pcre_pattern_to_host_byte_order(re, extra, tables)
209    
210    #define PCRE_STUDY8(extra, re, options, error) \
211      extra = pcre_study(re, options, error)
212    
213    #endif /* SUPPORT_PCRE8 */
214    
215    
216    #ifdef SUPPORT_PCRE16
217    
218    #define PCHARS16(lv, p, len, f) \
219      lv = pchars16((PCRE_SPTR16)p, len, f)
220    
221    #define PCHARSV16(p, len, f) \
222      (void)pchars16((PCRE_SPTR16)p, len, f)
223    
224    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
225      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
226    
227    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
228        offsets, size_offsets, workspace, size_workspace) \
229      count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
230        options, offsets, size_offsets, workspace, size_workspace)
231    
232    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
233        offsets, size_offsets) \
234      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
235        options, offsets, size_offsets)
236    
237    #define PCRE_FREE_STUDY16(extra) \
238      pcre16_free_study(extra)
239    
240    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
241      pcre16_pattern_to_host_byte_order(re, extra, tables)
242    
243    #define PCRE_STUDY16(extra, re, options, error) \
244      extra = pcre16_study(re, options, error)
245    
246    #endif /* SUPPORT_PCRE16 */
247    
248    
249    /* ----- Both modes are supported; a runtime test is needed ----- */
250    
251    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
252    
253    #define PCHARS(lv, p, len, f) \
254      if (use_pcre16) \
255        PCHARS16(lv, p, len, f); \
256      else \
257        PCHARS8(lv, p, len, f)
258    
259    #define PCHARSV(p, len, f) \
260      if (use_pcre16) \
261        PCHARSV16(p, len, f); \
262      else \
263        PCHARSV8(p, len, f)
264    
265    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
266      if (use_pcre16) \
267        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
268      else \
269        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
270    
271    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
272        offsets, size_offsets, workspace, size_workspace) \
273      if (use_pcre16) \
274        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
275          offsets, size_offsets, workspace, size_workspace); \
276      else \
277        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
278          offsets, size_offsets, workspace, size_workspace)
279    
280    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
281        offsets, size_offsets) \
282      if (use_pcre16) \
283        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
284          offsets, size_offsets); \
285      else \
286        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
287          offsets, size_offsets)
288    
289    #define PCRE_FREE_STUDY(extra) \
290      if (use_pcre16) \
291        PCRE_FREE_STUDY16(extra); \
292      else \
293        PCRE_FREE_STUDY8(extra)
294    
295    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
296      if (use_pcre16) \
297        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
298      else \
299        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
300    
301    #define PCRE_STUDY(extra, re, options, error) \
302      if (use_pcre16) \
303        PCRE_STUDY16(extra, re, options, error); \
304      else \
305        PCRE_STUDY8(extra, re, options, error)
306    
307    /* ----- Only 8-bit mode is supported ----- */
308    
309    #elif defined SUPPORT_PCRE8
310    #define PCHARS           PCHARS8
311    #define PCHARSV          PCHARSV8
312    #define PCRE_COMPILE     PCRE_COMPILE8
313    #define PCRE_DFA_EXEC    PCRE_DFA_EXEC8
314    #define PCRE_EXEC        PCRE_EXEC8
315    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY8
316    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
317    #define PCRE_STUDY       PCRE_STUDY8
318    
319    /* ----- Only 16-bit mode is supported ----- */
320    
321    #else
322    #define PCHARS           PCHARS16
323    #define PCHARSV          PCHARSV16
324    #define PCRE_COMPILE     PCRE_COMPILE16
325    #define PCRE_DFA_EXEC    PCRE_DFA_EXEC16
326    #define PCRE_EXEC        PCRE_EXEC16
327    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY16
328    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
329    #define PCRE_STUDY       PCRE_STUDY16
330    #endif
331    
332    /* ----- End of mode-specific function call macros ----- */
333    
334    
335    /* Other parameters */
336    
337  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
338  #ifdef CLK_TCK  #ifdef CLK_TCK
339  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 342  Makefile. */
342  #endif  #endif
343  #endif  #endif
344    
345  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
346    
347    #define LOOPREPEAT 500000
348    
349    /* Static variables */
350    
351  static FILE *outfile;  static FILE *outfile;
352  static int log_store = 0;  static int log_store = 0;
353    static int callout_count;
354    static int callout_extra;
355    static int callout_fail_count;
356    static int callout_fail_id;
357    static int debug_lengths;
358    static int first_callout;
359    static int locale_set = 0;
360    static int show_malloc;
361    static int use_utf;
362    static size_t gotten_store;
363    static size_t first_gotten_store = 0;
364    static const unsigned char *last_callout_mark = NULL;
365    
366    /* The buffers grow automatically if very long input lines are encountered. */
367    
368    static int buffer_size = 50000;
369    static pcre_uint8 *buffer = NULL;
370    static pcre_uint8 *dbuffer = NULL;
371    static pcre_uint8 *pbuffer = NULL;
372    
373    /* Another buffer is needed translation to 16-bit character strings. It will
374    obtained and extended as required. */
375    
376    #ifdef SUPPORT_PCRE16
377    static int buffer16_size = 0;
378    static pcre_uint16 *buffer16 = NULL;
379    
380    /* We need the table of operator lengths that is used for 16-bit compiling, in
381    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
382    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
383    appropriately for the 16-bit world. Just as a safety check, make sure that
384    COMPILE_PCRE16 is *not* set. */
385    
386    #ifdef COMPILE_PCRE16
387    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
388    #endif
389    
390    #if LINK_SIZE == 2
391    #undef LINK_SIZE
392    #define LINK_SIZE 1
393    #elif LINK_SIZE == 3 || LINK_SIZE == 4
394    #undef LINK_SIZE
395    #define LINK_SIZE 2
396    #else
397    #error LINK_SIZE must be either 2, 3, or 4
398    #endif
399    
400    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
401    
402  /* Debugging function to print the internal form of the regex. This is the same  #endif  /* SUPPORT_PCRE16 */
 code as contained in pcre.c under the DEBUG macro. */  
403    
404  static const char *OP_names[] = {  /* If we have 8-bit support, default use_pcre16 to false; if there is also
405    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  16-bit support, it can be changed by an option. If there is no 8-bit support,
406    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  there must be 16-bit support, so default it to 1. */
407    "Opt", "^", "$", "Any", "chars", "not",  
408    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  #ifdef SUPPORT_PCRE8
409    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  static int use_pcre16 = 0;
410    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  #else
411    "*", "*?", "+", "+?", "?", "??", "{", "{",  static int use_pcre16 = 1;
412    "class", "Ref",  #endif
413    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
414    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  /* Textual explanations for runtime error codes */
415    "Brazero", "Braminzero", "Bra"  
416    static const char *errtexts[] = {
417      NULL,  /* 0 is no error */
418      NULL,  /* NOMATCH is handled specially */
419      "NULL argument passed",
420      "bad option value",
421      "magic number missing",
422      "unknown opcode - pattern overwritten?",
423      "no more memory",
424      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
425      "match limit exceeded",
426      "callout error code",
427      NULL,  /* BADUTF8 is handled specially */
428      "bad UTF-8 offset",
429      NULL,  /* PARTIAL is handled specially */
430      "not used - internal error",
431      "internal error - pattern overwritten?",
432      "bad count value",
433      "item unsupported for DFA matching",
434      "backreference condition or recursion test not supported for DFA matching",
435      "match limit not supported for DFA matching",
436      "workspace size exceeded in DFA matching",
437      "too much recursion for DFA matching",
438      "recursion limit exceeded",
439      "not used - internal error",
440      "invalid combination of newline options",
441      "bad offset value",
442      NULL,  /* SHORTUTF8 is handled specially */
443      "nested recursion at the same subject position",
444      "JIT stack limit reached",
445      "pattern compiled in wrong mode (8-bit/16-bit error)"
446  };  };
447    
448    
449  static void print_internals(pcre *re)  /*************************************************
450    *         Alternate character tables             *
451    *************************************************/
452    
453    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
454    using the default tables of the library. However, the T option can be used to
455    select alternate sets of tables, for different kinds of testing. Note also that
456    the L (locale) option also adjusts the tables. */
457    
458    /* This is the set of tables distributed as default with PCRE. It recognizes
459    only ASCII characters. */
460    
461    static const pcre_uint8 tables0[] = {
462    
463    /* This table is a lower casing table. */
464    
465        0,  1,  2,  3,  4,  5,  6,  7,
466        8,  9, 10, 11, 12, 13, 14, 15,
467       16, 17, 18, 19, 20, 21, 22, 23,
468       24, 25, 26, 27, 28, 29, 30, 31,
469       32, 33, 34, 35, 36, 37, 38, 39,
470       40, 41, 42, 43, 44, 45, 46, 47,
471       48, 49, 50, 51, 52, 53, 54, 55,
472       56, 57, 58, 59, 60, 61, 62, 63,
473       64, 97, 98, 99,100,101,102,103,
474      104,105,106,107,108,109,110,111,
475      112,113,114,115,116,117,118,119,
476      120,121,122, 91, 92, 93, 94, 95,
477       96, 97, 98, 99,100,101,102,103,
478      104,105,106,107,108,109,110,111,
479      112,113,114,115,116,117,118,119,
480      120,121,122,123,124,125,126,127,
481      128,129,130,131,132,133,134,135,
482      136,137,138,139,140,141,142,143,
483      144,145,146,147,148,149,150,151,
484      152,153,154,155,156,157,158,159,
485      160,161,162,163,164,165,166,167,
486      168,169,170,171,172,173,174,175,
487      176,177,178,179,180,181,182,183,
488      184,185,186,187,188,189,190,191,
489      192,193,194,195,196,197,198,199,
490      200,201,202,203,204,205,206,207,
491      208,209,210,211,212,213,214,215,
492      216,217,218,219,220,221,222,223,
493      224,225,226,227,228,229,230,231,
494      232,233,234,235,236,237,238,239,
495      240,241,242,243,244,245,246,247,
496      248,249,250,251,252,253,254,255,
497    
498    /* This table is a case flipping table. */
499    
500        0,  1,  2,  3,  4,  5,  6,  7,
501        8,  9, 10, 11, 12, 13, 14, 15,
502       16, 17, 18, 19, 20, 21, 22, 23,
503       24, 25, 26, 27, 28, 29, 30, 31,
504       32, 33, 34, 35, 36, 37, 38, 39,
505       40, 41, 42, 43, 44, 45, 46, 47,
506       48, 49, 50, 51, 52, 53, 54, 55,
507       56, 57, 58, 59, 60, 61, 62, 63,
508       64, 97, 98, 99,100,101,102,103,
509      104,105,106,107,108,109,110,111,
510      112,113,114,115,116,117,118,119,
511      120,121,122, 91, 92, 93, 94, 95,
512       96, 65, 66, 67, 68, 69, 70, 71,
513       72, 73, 74, 75, 76, 77, 78, 79,
514       80, 81, 82, 83, 84, 85, 86, 87,
515       88, 89, 90,123,124,125,126,127,
516      128,129,130,131,132,133,134,135,
517      136,137,138,139,140,141,142,143,
518      144,145,146,147,148,149,150,151,
519      152,153,154,155,156,157,158,159,
520      160,161,162,163,164,165,166,167,
521      168,169,170,171,172,173,174,175,
522      176,177,178,179,180,181,182,183,
523      184,185,186,187,188,189,190,191,
524      192,193,194,195,196,197,198,199,
525      200,201,202,203,204,205,206,207,
526      208,209,210,211,212,213,214,215,
527      216,217,218,219,220,221,222,223,
528      224,225,226,227,228,229,230,231,
529      232,233,234,235,236,237,238,239,
530      240,241,242,243,244,245,246,247,
531      248,249,250,251,252,253,254,255,
532    
533    /* This table contains bit maps for various character classes. Each map is 32
534    bytes long and the bits run from the least significant end of each byte. The
535    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
536    graph, print, punct, and cntrl. Other classes are built from combinations. */
537    
538      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
539      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
540      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
541      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
542    
543      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
544      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
545      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
546      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
547    
548      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
549      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
550      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
551      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
552    
553      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
554      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
555      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
556      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
557    
558      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
559      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
560      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
561      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
562    
563      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
564      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
565      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
566      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
567    
568      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
569      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
570      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
571      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
572    
573      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
574      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
575      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
576      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
577    
578      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
579      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
580      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
581      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
582    
583      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
584      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
585      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
586      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
587    
588    /* This table identifies various classes of character by individual bits:
589      0x01   white space character
590      0x02   letter
591      0x04   decimal digit
592      0x08   hexadecimal digit
593      0x10   alphanumeric or '_'
594      0x80   regular expression metacharacter or binary zero
595    */
596    
597      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
598      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
599      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
600      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
601      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
602      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
603      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
604      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
605      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
606      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
607      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
608      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
609      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
610      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
611      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
612      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
613      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
614      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
615      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
616      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
617      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
618      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
619      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
620      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
621      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
622      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
623      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
624      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
625      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
626      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
627      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
628      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
629    
630    /* This is a set of tables that came orginally from a Windows user. It seems to
631    be at least an approximation of ISO 8859. In particular, there are characters
632    greater than 128 that are marked as spaces, letters, etc. */
633    
634    static const pcre_uint8 tables1[] = {
635    0,1,2,3,4,5,6,7,
636    8,9,10,11,12,13,14,15,
637    16,17,18,19,20,21,22,23,
638    24,25,26,27,28,29,30,31,
639    32,33,34,35,36,37,38,39,
640    40,41,42,43,44,45,46,47,
641    48,49,50,51,52,53,54,55,
642    56,57,58,59,60,61,62,63,
643    64,97,98,99,100,101,102,103,
644    104,105,106,107,108,109,110,111,
645    112,113,114,115,116,117,118,119,
646    120,121,122,91,92,93,94,95,
647    96,97,98,99,100,101,102,103,
648    104,105,106,107,108,109,110,111,
649    112,113,114,115,116,117,118,119,
650    120,121,122,123,124,125,126,127,
651    128,129,130,131,132,133,134,135,
652    136,137,138,139,140,141,142,143,
653    144,145,146,147,148,149,150,151,
654    152,153,154,155,156,157,158,159,
655    160,161,162,163,164,165,166,167,
656    168,169,170,171,172,173,174,175,
657    176,177,178,179,180,181,182,183,
658    184,185,186,187,188,189,190,191,
659    224,225,226,227,228,229,230,231,
660    232,233,234,235,236,237,238,239,
661    240,241,242,243,244,245,246,215,
662    248,249,250,251,252,253,254,223,
663    224,225,226,227,228,229,230,231,
664    232,233,234,235,236,237,238,239,
665    240,241,242,243,244,245,246,247,
666    248,249,250,251,252,253,254,255,
667    0,1,2,3,4,5,6,7,
668    8,9,10,11,12,13,14,15,
669    16,17,18,19,20,21,22,23,
670    24,25,26,27,28,29,30,31,
671    32,33,34,35,36,37,38,39,
672    40,41,42,43,44,45,46,47,
673    48,49,50,51,52,53,54,55,
674    56,57,58,59,60,61,62,63,
675    64,97,98,99,100,101,102,103,
676    104,105,106,107,108,109,110,111,
677    112,113,114,115,116,117,118,119,
678    120,121,122,91,92,93,94,95,
679    96,65,66,67,68,69,70,71,
680    72,73,74,75,76,77,78,79,
681    80,81,82,83,84,85,86,87,
682    88,89,90,123,124,125,126,127,
683    128,129,130,131,132,133,134,135,
684    136,137,138,139,140,141,142,143,
685    144,145,146,147,148,149,150,151,
686    152,153,154,155,156,157,158,159,
687    160,161,162,163,164,165,166,167,
688    168,169,170,171,172,173,174,175,
689    176,177,178,179,180,181,182,183,
690    184,185,186,187,188,189,190,191,
691    224,225,226,227,228,229,230,231,
692    232,233,234,235,236,237,238,239,
693    240,241,242,243,244,245,246,215,
694    248,249,250,251,252,253,254,223,
695    192,193,194,195,196,197,198,199,
696    200,201,202,203,204,205,206,207,
697    208,209,210,211,212,213,214,247,
698    216,217,218,219,220,221,222,255,
699    0,62,0,0,1,0,0,0,
700    0,0,0,0,0,0,0,0,
701    32,0,0,0,1,0,0,0,
702    0,0,0,0,0,0,0,0,
703    0,0,0,0,0,0,255,3,
704    126,0,0,0,126,0,0,0,
705    0,0,0,0,0,0,0,0,
706    0,0,0,0,0,0,0,0,
707    0,0,0,0,0,0,255,3,
708    0,0,0,0,0,0,0,0,
709    0,0,0,0,0,0,12,2,
710    0,0,0,0,0,0,0,0,
711    0,0,0,0,0,0,0,0,
712    254,255,255,7,0,0,0,0,
713    0,0,0,0,0,0,0,0,
714    255,255,127,127,0,0,0,0,
715    0,0,0,0,0,0,0,0,
716    0,0,0,0,254,255,255,7,
717    0,0,0,0,0,4,32,4,
718    0,0,0,128,255,255,127,255,
719    0,0,0,0,0,0,255,3,
720    254,255,255,135,254,255,255,7,
721    0,0,0,0,0,4,44,6,
722    255,255,127,255,255,255,127,255,
723    0,0,0,0,254,255,255,255,
724    255,255,255,255,255,255,255,127,
725    0,0,0,0,254,255,255,255,
726    255,255,255,255,255,255,255,255,
727    0,2,0,0,255,255,255,255,
728    255,255,255,255,255,255,255,127,
729    0,0,0,0,255,255,255,255,
730    255,255,255,255,255,255,255,255,
731    0,0,0,0,254,255,0,252,
732    1,0,0,248,1,0,0,120,
733    0,0,0,0,254,255,255,255,
734    0,0,128,0,0,0,128,0,
735    255,255,255,255,0,0,0,0,
736    0,0,0,0,0,0,0,128,
737    255,255,255,255,0,0,0,0,
738    0,0,0,0,0,0,0,0,
739    128,0,0,0,0,0,0,0,
740    0,1,1,0,1,1,0,0,
741    0,0,0,0,0,0,0,0,
742    0,0,0,0,0,0,0,0,
743    1,0,0,0,128,0,0,0,
744    128,128,128,128,0,0,128,0,
745    28,28,28,28,28,28,28,28,
746    28,28,0,0,0,0,0,128,
747    0,26,26,26,26,26,26,18,
748    18,18,18,18,18,18,18,18,
749    18,18,18,18,18,18,18,18,
750    18,18,18,128,128,0,128,16,
751    0,26,26,26,26,26,26,18,
752    18,18,18,18,18,18,18,18,
753    18,18,18,18,18,18,18,18,
754    18,18,18,128,128,0,0,0,
755    0,0,0,0,0,1,0,0,
756    0,0,0,0,0,0,0,0,
757    0,0,0,0,0,0,0,0,
758    0,0,0,0,0,0,0,0,
759    1,0,0,0,0,0,0,0,
760    0,0,18,0,0,0,0,0,
761    0,0,20,20,0,18,0,0,
762    0,20,18,0,0,0,0,0,
763    18,18,18,18,18,18,18,18,
764    18,18,18,18,18,18,18,18,
765    18,18,18,18,18,18,18,0,
766    18,18,18,18,18,18,18,18,
767    18,18,18,18,18,18,18,18,
768    18,18,18,18,18,18,18,18,
769    18,18,18,18,18,18,18,0,
770    18,18,18,18,18,18,18,18
771    };
772    
773    
774    
775    
776    #ifndef HAVE_STRERROR
777    /*************************************************
778    *     Provide strerror() for non-ANSI libraries  *
779    *************************************************/
780    
781    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
782    in their libraries, but can provide the same facility by this simple
783    alternative function. */
784    
785    extern int   sys_nerr;
786    extern char *sys_errlist[];
787    
788    char *
789    strerror(int n)
790    {
791    if (n < 0 || n >= sys_nerr) return "unknown error number";
792    return sys_errlist[n];
793    }
794    #endif /* HAVE_STRERROR */
795    
796    
797    /*************************************************
798    *         JIT memory callback                    *
799    *************************************************/
800    
801    static pcre_jit_stack* jit_callback(void *arg)
802    {
803    return (pcre_jit_stack *)arg;
804    }
805    
806    
807    /*************************************************
808    *            Convert UTF-8 string to value       *
809    *************************************************/
810    
811    /* This function takes one or more bytes that represents a UTF-8 character,
812    and returns the value of the character.
813    
814    Argument:
815      utf8bytes   a pointer to the byte vector
816      vptr        a pointer to an int to receive the value
817    
818    Returns:      >  0 => the number of bytes consumed
819                  -6 to 0 => malformed UTF-8 character at offset = (-return)
820    */
821    
822    #if !defined NOUTF8
823    
824    static int
825    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
826    {
827    int c = *utf8bytes++;
828    int d = c;
829    int i, j, s;
830    
831    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
832      {
833      if ((d & 0x80) == 0) break;
834      d <<= 1;
835      }
836    
837    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
838    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
839    
840    /* i now has a value in the range 1-5 */
841    
842    s = 6*i;
843    d = (c & utf8_table3[i]) << s;
844    
845    for (j = 0; j < i; j++)
846      {
847      c = *utf8bytes++;
848      if ((c & 0xc0) != 0x80) return -(j+1);
849      s -= 6;
850      d |= (c & 0x3f) << s;
851      }
852    
853    /* Check that encoding was the correct unique one */
854    
855    for (j = 0; j < utf8_table1_size; j++)
856      if (d <= utf8_table1[j]) break;
857    if (j != i) return -(i+1);
858    
859    /* Valid value */
860    
861    *vptr = d;
862    return i+1;
863    }
864    
865    #endif
866    
867    
868    
869    /*************************************************
870    *       Convert character value to UTF-8         *
871    *************************************************/
872    
873    /* This function takes an integer value in the range 0 - 0x7fffffff
874    and encodes it as a UTF-8 character in 0 to 6 bytes.
875    
876    Arguments:
877      cvalue     the character value
878      utf8bytes  pointer to buffer for result - at least 6 bytes long
879    
880    Returns:     number of characters placed in the buffer
881    */
882    
883    #if !defined NOUTF8
884    
885    static int
886    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
887    {
888    register int i, j;
889    for (i = 0; i < utf8_table1_size; i++)
890      if (cvalue <= utf8_table1[i]) break;
891    utf8bytes += i;
892    for (j = i; j > 0; j--)
893     {
894     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
895     cvalue >>= 6;
896     }
897    *utf8bytes = utf8_table2[i] | cvalue;
898    return i + 1;
899    }
900    
901    #endif
902    
903    
904    
905    #ifdef SUPPORT_PCRE16
906    /*************************************************
907    *         Convert a string to 16-bit             *
908    *************************************************/
909    
910    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
911    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
912    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
913    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
914    result is always left in buffer16.
915    
916    Arguments:
917      p          points to a byte string
918      utf        true if UTF-8 (to be converted to UTF-16)
919      len        number of bytes in the string (excluding trailing zero)
920    
921    Returns:     number of 16-bit data items used (excluding trailing zero)
922                 OR -1 if a UTF-8 string is malformed
923    */
924    
925    static int
926    to16(pcre_uint8 *p, int utf, int len)
927  {  {
928  unsigned char *code = ((real_pcre *)re)->code;  pcre_uint16 *pp;
929    
930    if (buffer16_size < 2*len + 2)
931      {
932      if (buffer16 != NULL) free(buffer16);
933      buffer16_size = 2*len + 2;
934      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
935      if (buffer16 == NULL)
936        {
937        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
938        exit(1);
939        }
940      }
941    
942  fprintf(outfile, "------------------------------------------------------------------\n");  pp = buffer16;
943    
944  for(;;)  if (!utf)
945      {
946      while (len-- > 0) *pp++ = *p++;
947      }
948    
949    else
950    {    {
951    int c;    int c;
952    int charlength;    while (len > 0)
953        {
954        int chlen = utf82ord(p, &c);
955        if (chlen <= 0) return -1;
956        p += chlen;
957        len -= chlen;
958        if (c < 0x10000) *pp++ = c; else
959          {
960          c -= 0x10000;
961          *pp++ = 0xD800 | (c >> 10);
962          *pp++ = 0xDC00 | (c & 0x3ff);
963          }
964        }
965      }
966    
967    *pp = 0;
968    return pp - buffer16;
969    }
970    #endif
971    
972    
973    /*************************************************
974    *        Read or extend an input line            *
975    *************************************************/
976    
977    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  /* Input lines are read into buffer, but both patterns and data lines can be
978    continued over multiple input lines. In addition, if the buffer fills up, we
979    want to automatically expand it so as to be able to handle extremely large
980    lines that are needed for certain stress tests. When the input buffer is
981    expanded, the other two buffers must also be expanded likewise, and the
982    contents of pbuffer, which are a copy of the input for callouts, must be
983    preserved (for when expansion happens for a data line). This is not the most
984    optimal way of handling this, but hey, this is just a test program!
985    
986    Arguments:
987      f            the file to read
988      start        where in buffer to start (this *must* be within buffer)
989      prompt       for stdin or readline()
990    
991    Returns:       pointer to the start of new data
992                   could be a copy of start, or could be moved
993                   NULL if no data read and EOF reached
994    */
995    
996    if (*code >= OP_BRA)  static pcre_uint8 *
997    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
998    {
999    pcre_uint8 *here = start;
1000    
1001    for (;;)
1002      {
1003      int rlen = (int)(buffer_size - (here - buffer));
1004    
1005      if (rlen > 1000)
1006      {      {
1007      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      int dlen;
1008      code += 2;  
1009        /* If libreadline support is required, use readline() to read a line if the
1010        input is a terminal. Note that readline() removes the trailing newline, so
1011        we must put it back again, to be compatible with fgets(). */
1012    
1013    #ifdef SUPPORT_LIBREADLINE
1014        if (isatty(fileno(f)))
1015          {
1016          size_t len;
1017          char *s = readline(prompt);
1018          if (s == NULL) return (here == start)? NULL : start;
1019          len = strlen(s);
1020          if (len > 0) add_history(s);
1021          if (len > rlen - 1) len = rlen - 1;
1022          memcpy(here, s, len);
1023          here[len] = '\n';
1024          here[len+1] = 0;
1025          free(s);
1026          }
1027        else
1028    #endif
1029    
1030        /* Read the next line by normal means, prompting if the file is stdin. */
1031    
1032          {
1033          if (f == stdin) printf("%s", prompt);
1034          if (fgets((char *)here, rlen,  f) == NULL)
1035            return (here == start)? NULL : start;
1036          }
1037    
1038        dlen = (int)strlen((char *)here);
1039        if (dlen > 0 && here[dlen - 1] == '\n') return start;
1040        here += dlen;
1041      }      }
1042    
1043    else switch(*code)    else
1044      {      {
1045      case OP_END:      int new_buffer_size = 2*buffer_size;
1046      fprintf(outfile, "    %s\n", OP_names[*code]);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1047      fprintf(outfile, "------------------------------------------------------------------\n");      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1048      return;      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1049    
1050      case OP_OPT:      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1051      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);        {
1052      code++;        fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1053      break;        exit(1);
1054          }
1055    
1056      case OP_COND:      memcpy(new_buffer, buffer, buffer_size);
1057      fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);      memcpy(new_pbuffer, pbuffer, buffer_size);
     code += 2;  
     break;  
1058    
1059      case OP_CREF:      buffer_size = new_buffer_size;
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
1060    
1061      case OP_CHARS:      start = new_buffer + (start - buffer);
1062      charlength = *(++code);      here = new_buffer + (here - buffer);
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
1063    
1064      case OP_KETRMAX:      free(buffer);
1065      case OP_KETRMIN:      free(dbuffer);
1066      case OP_ALT:      free(pbuffer);
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
1067    
1068      case OP_REVERSE:      buffer = new_buffer;
1069      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      dbuffer = new_dbuffer;
1070      code += 2;      pbuffer = new_pbuffer;
1071      break;      }
1072      }
1073    
1074    return NULL;  /* Control never gets here */
1075    }
1076    
1077    
1078    
1079    /*************************************************
1080    *          Read number from string               *
1081    *************************************************/
1082    
1083    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1084    around with conditional compilation, just do the job by hand. It is only used
1085    for unpicking arguments, so just keep it simple.
1086    
1087    Arguments:
1088      str           string to be converted
1089      endptr        where to put the end pointer
1090    
1091    Returns:        the unsigned long
1092    */
1093    
1094    static int
1095    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1096    {
1097    int result = 0;
1098    while(*str != 0 && isspace(*str)) str++;
1099    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1100    *endptr = str;
1101    return(result);
1102    }
1103    
1104    
1105    
1106    /*************************************************
1107    *             Print one character                *
1108    *************************************************/
1109    
1110    /* Print a single character either literally, or as a hex escape. */
1111    
1112    static int pchar(int c, FILE *f)
1113    {
1114    if (PRINTOK(c))
1115      {
1116      if (f != NULL) fprintf(f, "%c", c);
1117      return 1;
1118      }
1119    
1120    if (c < 0x100)
1121      {
1122      if (use_utf)
1123        {
1124        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1125        return 6;
1126        }
1127      else
1128        {
1129        if (f != NULL) fprintf(f, "\\x%02x", c);
1130        return 4;
1131        }
1132      }
1133    
1134    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1135    return (c <= 0x000000ff)? 6 :
1136           (c <= 0x00000fff)? 7 :
1137           (c <= 0x0000ffff)? 8 :
1138           (c <= 0x000fffff)? 9 : 10;
1139    }
1140    
1141    
1142    
1143    #ifdef SUPPORT_PCRE8
1144    /*************************************************
1145    *         Print 8-bit character string           *
1146    *************************************************/
1147    
1148    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1149    If handed a NULL file, just counts chars without printing. */
1150    
1151    static int pchars(pcre_uint8 *p, int length, FILE *f)
1152    {
1153    int c = 0;
1154    int yield = 0;
1155    
1156    while (length-- > 0)
1157      {
1158    #if !defined NOUTF8
1159      if (use_utf)
1160        {
1161        int rc = utf82ord(p, &c);
1162        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1163          {
1164          length -= rc - 1;
1165          p += rc;
1166          yield += pchar(c, f);
1167          continue;
1168          }
1169        }
1170    #endif
1171      c = *p++;
1172      yield += pchar(c, f);
1173      }
1174    
1175    return yield;
1176    }
1177    #endif
1178    
1179    
1180    
1181    #ifdef SUPPORT_PCRE16
1182    /*************************************************
1183    *           Print 16-bit character string        *
1184    *************************************************/
1185    
1186    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1187    If handed a NULL file, just counts chars without printing. */
1188    
1189    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1190    {
1191    int yield = 0;
1192    
1193    while (length-- > 0)
1194      {
1195      int c = *p++ & 0xffff;
1196    #if !defined NOUTF8
1197      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1198        {
1199        int d = *p & 0xffff;
1200        if (d >= 0xDC00 && d < 0xDFFF)
1201          {
1202          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1203          length--;
1204          p++;
1205          }
1206        }
1207    #endif
1208      yield += pchar(c, f);
1209      }
1210    
1211    return yield;
1212    }
1213    #endif
1214    
1215    
1216    
1217    /*************************************************
1218    *              Callout function                  *
1219    *************************************************/
1220    
1221    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1222    the match. Yield zero unless more callouts than the fail count, or the callout
1223    data is not zero. */
1224    
1225    static int callout(pcre_callout_block *cb)
1226    {
1227    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1228    int i, pre_start, post_start, subject_length;
1229    
1230    if (callout_extra)
1231      {
1232      fprintf(f, "Callout %d: last capture = %d\n",
1233        cb->callout_number, cb->capture_last);
1234    
1235      for (i = 0; i < cb->capture_top * 2; i += 2)
1236        {
1237        if (cb->offset_vector[i] < 0)
1238          fprintf(f, "%2d: <unset>\n", i/2);
1239        else
1240          {
1241          fprintf(f, "%2d: ", i/2);
1242          PCHARSV(cb->subject + cb->offset_vector[i],
1243            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1244          fprintf(f, "\n");
1245          }
1246        }
1247      }
1248    
1249    /* Re-print the subject in canonical form, the first time or if giving full
1250    datails. On subsequent calls in the same match, we use pchars just to find the
1251    printed lengths of the substrings. */
1252    
1253    if (f != NULL) fprintf(f, "--->");
1254    
1255    PCHARS(pre_start, cb->subject, cb->start_match, f);
1256    PCHARS(post_start, cb->subject + cb->start_match,
1257      cb->current_position - cb->start_match, f);
1258    
1259    PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1260    
1261    PCHARSV(cb->subject + cb->current_position,
1262      cb->subject_length - cb->current_position, f);
1263    
1264    if (f != NULL) fprintf(f, "\n");
1265    
1266    /* Always print appropriate indicators, with callout number if not already
1267    shown. For automatic callouts, show the pattern offset. */
1268    
1269    if (cb->callout_number == 255)
1270      {
1271      fprintf(outfile, "%+3d ", cb->pattern_position);
1272      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1273      }
1274    else
1275      {
1276      if (callout_extra) fprintf(outfile, "    ");
1277        else fprintf(outfile, "%3d ", cb->callout_number);
1278      }
1279    
1280    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1281    fprintf(outfile, "^");
1282    
1283    if (post_start > 0)
1284      {
1285      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1286      fprintf(outfile, "^");
1287      }
1288    
1289    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1290      fprintf(outfile, " ");
1291    
1292    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1293      pbuffer + cb->pattern_position);
1294    
1295    fprintf(outfile, "\n");
1296    first_callout = 0;
1297    
1298    if (cb->mark != last_callout_mark)
1299      {
1300      fprintf(outfile, "Latest Mark: %s\n",
1301        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1302      last_callout_mark = cb->mark;
1303      }
1304    
1305    if (cb->callout_data != NULL)
1306      {
1307      int callout_data = *((int *)(cb->callout_data));
1308      if (callout_data != 0)
1309        {
1310        fprintf(outfile, "Callout data = %d\n", callout_data);
1311        return callout_data;
1312        }
1313      }
1314    
1315    return (cb->callout_number != callout_fail_id)? 0 :
1316           (++callout_count >= callout_fail_count)? 1 : 0;
1317    }
1318    
1319    
1320    /*************************************************
1321    *            Local malloc functions              *
1322    *************************************************/
1323    
1324    /* Alternative malloc function, to test functionality and save the size of a
1325    compiled re, which is the first store request that pcre_compile() makes. The
1326    show_malloc variable is set only during matching. */
1327    
1328    static void *new_malloc(size_t size)
1329    {
1330    void *block = malloc(size);
1331    gotten_store = size;
1332    if (first_gotten_store == 0) first_gotten_store = size;
1333    if (show_malloc)
1334      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1335    return block;
1336    }
1337    
1338    static void new_free(void *block)
1339    {
1340    if (show_malloc)
1341      fprintf(outfile, "free             %p\n", block);
1342    free(block);
1343    }
1344    
1345    /* For recursion malloc/free, to test stacking calls */
1346    
1347    static void *stack_malloc(size_t size)
1348    {
1349    void *block = malloc(size);
1350    if (show_malloc)
1351      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1352    return block;
1353    }
1354    
1355    static void stack_free(void *block)
1356    {
1357    if (show_malloc)
1358      fprintf(outfile, "stack_free       %p\n", block);
1359    free(block);
1360    }
1361    
1362    
1363    /*************************************************
1364    *          Call pcre_fullinfo()                  *
1365    *************************************************/
1366    
1367    /* Get one piece of information from the pcre_fullinfo() function. When only
1368    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1369    value, but the code is defensive. */
1370    
1371    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1372    {
1373    int rc;
1374    
1375    if (use_pcre16)
1376    #ifdef SUPPORT_PCRE16
1377      rc = pcre16_fullinfo(re, study, option, ptr);
1378    #else
1379      rc = PCRE_ERROR_BADMODE;
1380    #endif
1381    else
1382    #ifdef SUPPORT_PCRE8
1383      rc = pcre_fullinfo(re, study, option, ptr);
1384    #else
1385      rc = PCRE_ERROR_BADMODE;
1386    #endif
1387    
1388    if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1389      use_pcre16? "16" : "", option);
1390    }
1391    
1392    
1393    
1394    /*************************************************
1395    *             Swap byte functions                *
1396    *************************************************/
1397    
1398    /* The following functions swap the bytes of a pcre_uint16
1399    and pcre_uint32 value.
1400    
1401    Arguments:
1402      value        any number
1403    
1404    Returns:       the byte swapped value
1405    */
1406    
1407    static pcre_uint32
1408    swap_uint32(pcre_uint32 value)
1409    {
1410    return ((value & 0x000000ff) << 24) |
1411           ((value & 0x0000ff00) <<  8) |
1412           ((value & 0x00ff0000) >>  8) |
1413           (value >> 24);
1414    }
1415    
1416    static pcre_uint16
1417    swap_uint16(pcre_uint16 value)
1418    {
1419    return (value >> 8) | (value << 8);
1420    }
1421    
1422    
1423    
1424    /*************************************************
1425    *        Flip bytes in a compiled pattern        *
1426    *************************************************/
1427    
1428    /* This function is called if the 'F' option was present on a pattern that is
1429    to be written to a file. We flip the bytes of all the integer fields in the
1430    regex data block and the study block. In 16-bit mode this also flips relevant
1431    bytes in the pattern itself. This is to make it possible to test PCRE's
1432    ability to reload byte-flipped patterns, e.g. those compiled on a different
1433    architecture. */
1434    
1435    static void
1436    regexflip(pcre *ere, pcre_extra *extra)
1437    {
1438    real_pcre *re = (real_pcre *)ere;
1439    int op;
1440    
1441    #ifdef SUPPORT_PCRE16
1442    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1443    int length = re->name_count * re->name_entry_size;
1444    #ifdef SUPPORT_UTF
1445    BOOL utf = (re->options & PCRE_UTF16) != 0;
1446    BOOL utf16_char = FALSE;
1447    #endif /* SUPPORT_UTF */
1448    #endif /* SUPPORT_PCRE16 */
1449    
1450    /* Always flip the bytes in the main data block and study blocks. */
1451    
1452    re->magic_number = REVERSED_MAGIC_NUMBER;
1453    re->size = swap_uint32(re->size);
1454    re->options = swap_uint32(re->options);
1455    re->flags = swap_uint16(re->flags);
1456    re->top_bracket = swap_uint16(re->top_bracket);
1457    re->top_backref = swap_uint16(re->top_backref);
1458    re->first_char = swap_uint16(re->first_char);
1459    re->req_char = swap_uint16(re->req_char);
1460    re->name_table_offset = swap_uint16(re->name_table_offset);
1461    re->name_entry_size = swap_uint16(re->name_entry_size);
1462    re->name_count = swap_uint16(re->name_count);
1463    
1464    if (extra != NULL)
1465      {
1466      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1467      rsd->size = swap_uint32(rsd->size);
1468      rsd->flags = swap_uint32(rsd->flags);
1469      rsd->minlength = swap_uint32(rsd->minlength);
1470      }
1471    
1472    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1473    in the name table, if present, and then in the pattern itself. */
1474    
1475    #ifdef SUPPORT_PCRE16
1476    if (!use_pcre16) return;
1477    
1478    while(TRUE)
1479      {
1480      /* Swap previous characters. */
1481      while (length-- > 0)
1482        {
1483        *ptr = swap_uint16(*ptr);
1484        ptr++;
1485        }
1486    #ifdef SUPPORT_UTF
1487      if (utf16_char)
1488        {
1489        if ((ptr[-1] & 0xfc00) == 0xd800)
1490          {
1491          /* We know that there is only one extra character in UTF-16. */
1492          *ptr = swap_uint16(*ptr);
1493          ptr++;
1494          }
1495        }
1496      utf16_char = FALSE;
1497    #endif /* SUPPORT_UTF */
1498    
1499      /* Get next opcode. */
1500    
1501      length = 0;
1502      op = *ptr;
1503      *ptr++ = swap_uint16(op);
1504    
1505      switch (op)
1506        {
1507        case OP_END:
1508        return;
1509    
1510        case OP_CHAR:
1511        case OP_CHARI:
1512        case OP_NOT:
1513        case OP_NOTI:
1514      case OP_STAR:      case OP_STAR:
1515      case OP_MINSTAR:      case OP_MINSTAR:
1516      case OP_PLUS:      case OP_PLUS:
1517      case OP_MINPLUS:      case OP_MINPLUS:
1518      case OP_QUERY:      case OP_QUERY:
1519      case OP_MINQUERY:      case OP_MINQUERY:
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
1520      case OP_UPTO:      case OP_UPTO:
1521      case OP_MINUPTO:      case OP_MINUPTO:
1522      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);      case OP_EXACT:
1523        else fprintf(outfile, "    \\x%02x{", c);      case OP_POSSTAR:
1524      if (*code != OP_EXACT) fprintf(outfile, ",");      case OP_POSPLUS:
1525      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_POSQUERY:
1526      if (*code == OP_MINUPTO) fprintf(outfile, "?");      case OP_POSUPTO:
1527      code += 3;      case OP_STARI:
1528      break;      case OP_MINSTARI:
1529        case OP_PLUSI:
1530      case OP_TYPEEXACT:      case OP_MINPLUSI:
1531      case OP_TYPEUPTO:      case OP_QUERYI:
1532      case OP_TYPEMINUPTO:      case OP_MINQUERYI:
1533      fprintf(outfile, "    %s{", OP_names[code[3]]);      case OP_UPTOI:
1534      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");      case OP_MINUPTOI:
1535      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_EXACTI:
1536      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");      case OP_POSSTARI:
1537      code += 3;      case OP_POSPLUSI:
1538      break;      case OP_POSQUERYI:
1539        case OP_POSUPTOI:
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
1540      case OP_NOTSTAR:      case OP_NOTSTAR:
1541      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
1542      case OP_NOTPLUS:      case OP_NOTPLUS:
1543      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
1544      case OP_NOTQUERY:      case OP_NOTQUERY:
1545      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
1546      case OP_NOTUPTO:      case OP_NOTUPTO:
1547      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
1548      if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);      case OP_NOTEXACT:
1549        else fprintf(outfile, "    [^\\x%02x]{", c);      case OP_NOTPOSSTAR:
1550      if (*code != OP_NOTEXACT) fprintf(outfile, ",");      case OP_NOTPOSPLUS:
1551      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);      case OP_NOTPOSQUERY:
1552      if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");      case OP_NOTPOSUPTO:
1553      code += 3;      case OP_NOTSTARI:
1554        case OP_NOTMINSTARI:
1555        case OP_NOTPLUSI:
1556        case OP_NOTMINPLUSI:
1557        case OP_NOTQUERYI:
1558        case OP_NOTMINQUERYI:
1559        case OP_NOTUPTOI:
1560        case OP_NOTMINUPTOI:
1561        case OP_NOTEXACTI:
1562        case OP_NOTPOSSTARI:
1563        case OP_NOTPOSPLUSI:
1564        case OP_NOTPOSQUERYI:
1565        case OP_NOTPOSUPTOI:
1566    #ifdef SUPPORT_UTF
1567        if (utf) utf16_char = TRUE;
1568    #endif
1569        length = OP_lengths16[op] - 1;
1570      break;      break;
1571    
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
1572      case OP_CLASS:      case OP_CLASS:
1573        {      case OP_NCLASS:
1574        int i, min, max;      /* Skip the character bit map. */
1575        code++;      ptr += 32/sizeof(pcre_uint16);
1576        fprintf(outfile, "    [");      length = 0;
1577        break;
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
1578    
1579        switch(*code)      case OP_XCLASS:
1580          {      /* Reverse the size of the XCLASS instance. */
1581          case OP_CRSTAR:      ptr++;
1582          case OP_CRMINSTAR:      *ptr = swap_uint16(*ptr);
1583          case OP_CRPLUS:      if (LINK_SIZE > 1)
1584          case OP_CRMINPLUS:        {
1585          case OP_CRQUERY:        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1586          case OP_CRMINQUERY:        ptr++;
1587          fprintf(outfile, "%s", OP_names[*code]);        *ptr = swap_uint16(*ptr);
1588          break;        }
1589        ptr++;
1590    
1591          case OP_CRRANGE:      if (LINK_SIZE > 1)
1592          case OP_CRMINRANGE:        length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1593          min = (code[1] << 8) + code[2];          (1 + LINK_SIZE + 1);
1594          max = (code[3] << 8) + code[4];      else
1595          if (max == 0) fprintf(outfile, "{%d,}", min);        length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
1596    
1597          default:      op = *ptr;
1598          code--;      *ptr = swap_uint16(op);
1599          }      if ((op & XCL_MAP) != 0)
1600          {
1601          /* Skip the character bit map. */
1602          ptr += 32/sizeof(pcre_uint16);
1603          length -= 32/sizeof(pcre_uint16);
1604        }        }
1605      break;      break;
1606    
     /* Anything else is just a one-node item */  
   
1607      default:      default:
1608      fprintf(outfile, "    %s", OP_names[*code]);      length = OP_lengths16[op] - 1;
1609      break;      break;
1610      }      }
1611      }
1612    /* Control should never reach here in 16 bit mode. */
1613    #endif /* SUPPORT_PCRE16 */
1614    }
1615    
1616    
1617    
1618    /*************************************************
1619    *        Check match or recursion limit          *
1620    *************************************************/
1621    
1622    static int
1623    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1624      int start_offset, int options, int *use_offsets, int use_size_offsets,
1625      int flag, unsigned long int *limit, int errnumber, const char *msg)
1626    {
1627    int count;
1628    int min = 0;
1629    int mid = 64;
1630    int max = -1;
1631    
1632    extra->flags |= flag;
1633    
1634    code++;  for (;;)
1635    fprintf(outfile, "\n");    {
1636      *limit = mid;
1637    
1638      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1639        use_offsets, use_size_offsets);
1640    
1641      if (count == errnumber)
1642        {
1643        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1644        min = mid;
1645        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1646        }
1647    
1648      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1649                             count == PCRE_ERROR_PARTIAL)
1650        {
1651        if (mid == min + 1)
1652          {
1653          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1654          break;
1655          }
1656        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1657        max = mid;
1658        mid = (min + mid)/2;
1659        }
1660      else break;    /* Some other error */
1661    }    }
1662    
1663    extra->flags &= ~flag;
1664    return count;
1665  }  }
1666    
1667    
1668    
1669  /* Character string printing function. */  /*************************************************
1670    *         Case-independent strncmp() function    *
1671    *************************************************/
1672    
1673  static void pchars(unsigned char *p, int length)  /*
1674    Arguments:
1675      s         first string
1676      t         second string
1677      n         number of characters to compare
1678    
1679    Returns:    < 0, = 0, or > 0, according to the comparison
1680    */
1681    
1682    static int
1683    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1684  {  {
1685  int c;  while (n--)
1686  while (length-- > 0)    {
1687    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    int c = tolower(*s++) - tolower(*t++);
1688      else fprintf(outfile, "\\x%02x", c);    if (c) return c;
1689      }
1690    return 0;
1691  }  }
1692    
1693    
1694    
1695  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
1696  compiled re. */  *         Check newline indicator                *
1697    *************************************************/
1698    
1699  static void *new_malloc(size_t size)  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1700    a message and return 0 if there is no match.
1701    
1702    Arguments:
1703      p           points after the leading '<'
1704      f           file for error message
1705    
1706    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1707    */
1708    
1709    static int
1710    check_newline(pcre_uint8 *p, FILE *f)
1711    {
1712    if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1713    if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1714    if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1715    if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1716    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1717    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1718    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1719    fprintf(f, "Unknown newline type at: <%s\n", p);
1720    return 0;
1721    }
1722    
1723    
1724    
1725    /*************************************************
1726    *             Usage function                     *
1727    *************************************************/
1728    
1729    static void
1730    usage(void)
1731  {  {
1732  if (log_store)  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1733    fprintf(outfile, "Memory allocation (code space): %d\n",  printf("Input and output default to stdin and stdout.\n");
1734      (int)((int)size - offsetof(real_pcre, code[0])));  #ifdef SUPPORT_LIBREADLINE
1735  return malloc(size);  printf("If input is a terminal, readline() is used to read from it.\n");
1736    #else
1737    printf("This version of pcretest is not linked with readline().\n");
1738    #endif
1739    printf("\nOptions:\n");
1740    #ifdef SUPPORT_PCRE16
1741    printf("  -16      use 16-bit interface\n");
1742    #endif
1743    printf("  -b       show compiled code (bytecode)\n");
1744    printf("  -C       show PCRE compile-time options and exit\n");
1745    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1746    #if !defined NODFA
1747    printf("  -dfa     force DFA matching for all subjects\n");
1748    #endif
1749    printf("  -help    show usage information\n");
1750    printf("  -i       show information about compiled patterns\n"
1751           "  -M       find MATCH_LIMIT minimum for each subject\n"
1752           "  -m       output memory used information\n"
1753           "  -o <n>   set size of offsets vector to <n>\n");
1754    #if !defined NOPOSIX
1755    printf("  -p       use POSIX interface\n");
1756    #endif
1757    printf("  -q       quiet: do not output PCRE version number at start\n");
1758    printf("  -S <n>   set stack size to <n> megabytes\n");
1759    printf("  -s       force each pattern to be studied at basic level\n"
1760           "  -s+      force each pattern to be studied, using JIT if available\n"
1761           "  -t       time compilation and execution\n");
1762    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1763    printf("  -tm      time execution (matching) only\n");
1764    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1765  }  }
1766    
1767    
1768    
1769    /*************************************************
1770    *                Main Program                    *
1771    *************************************************/
1772    
1773  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
1774  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
1775  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 298  int main(int argc, char **argv) Line 1779  int main(int argc, char **argv)
1779  FILE *infile = stdin;  FILE *infile = stdin;
1780  int options = 0;  int options = 0;
1781  int study_options = 0;  int study_options = 0;
1782    int default_find_match_limit = FALSE;
1783  int op = 1;  int op = 1;
1784  int timeit = 0;  int timeit = 0;
1785    int timeitm = 0;
1786  int showinfo = 0;  int showinfo = 0;
1787  int showstore = 0;  int showstore = 0;
1788    int force_study = -1;
1789    int force_study_options = 0;
1790    int quiet = 0;
1791    int size_offsets = 45;
1792    int size_offsets_max;
1793    int *offsets = NULL;
1794    #if !defined NOPOSIX
1795  int posix = 0;  int posix = 0;
1796    #endif
1797  int debug = 0;  int debug = 0;
1798  int done = 0;  int done = 0;
1799  unsigned char buffer[30000];  int all_use_dfa = 0;
1800  unsigned char dbuffer[1024];  int yield = 0;
1801    int stack_size;
1802    
1803    pcre_jit_stack *jit_stack = NULL;
1804    
1805    /* These vectors store, end-to-end, a list of captured substring names. Assume
1806    that 1024 is plenty long enough for the few names we'll be testing. */
1807    
1808    pcre_uchar copynames[1024];
1809    pcre_uchar getnames[1024];
1810    
1811    pcre_uchar *copynamesptr;
1812    pcre_uchar *getnamesptr;
1813    
1814    /* Get buffers from malloc() so that valgrind will check their misuse when
1815    debugging. They grow automatically when very long lines are read. The 16-bit
1816    buffer (buffer16) is obtained only if needed. */
1817    
1818  /* Static so that new_malloc can use it. */  buffer = (pcre_uint8 *)malloc(buffer_size);
1819    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1820    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1821    
1822    /* The outfile variable is static so that new_malloc can use it. */
1823    
1824  outfile = stdout;  outfile = stdout;
1825    
1826    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1827    library to translate CRLF into a single LF character. At least, that's what
1828    I've been told: never having used Windows I take this all on trust. Originally
1829    it set 0x8000, but then I was advised that _O_BINARY was better. */
1830    
1831    #if defined(_WIN32) || defined(WIN32)
1832    _setmode( _fileno( stdout ), _O_BINARY );
1833    #endif
1834    
1835  /* Scan options */  /* Scan options */
1836    
1837  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1838    {    {
1839    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    pcre_uint8 *endptr;
1840      showstore = 1;  
1841    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    if (strcmp(argv[op], "-m") == 0) showstore = 1;
1842      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1843      else if (strcmp(argv[op], "-s+") == 0)
1844        {
1845        force_study = 1;
1846        force_study_options = PCRE_STUDY_JIT_COMPILE;
1847        }
1848    #ifdef SUPPORT_PCRE16
1849      else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1850    #endif
1851    
1852      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1853      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1854    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1855    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1856      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1857    #if !defined NODFA
1858      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1859    #endif
1860      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1861          ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1862            *endptr == 0))
1863        {
1864        op++;
1865        argc--;
1866        }
1867      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1868        {
1869        int both = argv[op][2] == 0;
1870        int temp;
1871        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1872                         *endptr == 0))
1873          {
1874          timeitm = temp;
1875          op++;
1876          argc--;
1877          }
1878        else timeitm = LOOPREPEAT;
1879        if (both) timeit = timeitm;
1880        }
1881      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1882          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1883            *endptr == 0))
1884        {
1885    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1886        printf("PCRE: -S not supported on this OS\n");
1887        exit(1);
1888    #else
1889        int rc;
1890        struct rlimit rlim;
1891        getrlimit(RLIMIT_STACK, &rlim);
1892        rlim.rlim_cur = stack_size * 1024 * 1024;
1893        rc = setrlimit(RLIMIT_STACK, &rlim);
1894        if (rc != 0)
1895          {
1896        printf("PCRE: setrlimit() failed with error %d\n", rc);
1897        exit(1);
1898          }
1899        op++;
1900        argc--;
1901    #endif
1902        }
1903    #if !defined NOPOSIX
1904    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1905    #endif
1906      else if (strcmp(argv[op], "-C") == 0)
1907        {
1908        int rc;
1909        unsigned long int lrc;
1910        printf("PCRE version %s\n", pcre_version());
1911        printf("Compiled with\n");
1912    
1913    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1914    are set, either both UTFs are supported or both are not supported. */
1915    
1916    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1917        printf("  8-bit and 16-bit support\n");
1918        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1919        if (rc)
1920          printf("  UTF-8 and UTF-16 support\n");
1921        else
1922          printf("  No UTF-8 or UTF-16 support\n");
1923    #elif defined SUPPORT_PCRE8
1924        printf("  8-bit support only\n");
1925        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1926        printf("  %sUTF-8 support\n", rc? "" : "No ");
1927    #else
1928        printf("  16-bit support only\n");
1929        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1930        printf("  %sUTF-16 support\n", rc? "" : "No ");
1931    #endif
1932    
1933        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1934        printf("  %sUnicode properties support\n", rc? "" : "No ");
1935        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1936        if (rc)
1937          printf("  Just-in-time compiler support\n");
1938        else
1939          printf("  No just-in-time compiler support\n");
1940        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1941        /* Note that these values are always the ASCII values, even
1942        in EBCDIC environments. CR is 13 and NL is 10. */
1943        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1944          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1945          (rc == -2)? "ANYCRLF" :
1946          (rc == -1)? "ANY" : "???");
1947        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1948        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1949                                         "all Unicode newlines");
1950        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1951        printf("  Internal link size = %d\n", rc);
1952        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1953        printf("  POSIX malloc threshold = %d\n", rc);
1954        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1955        printf("  Default match limit = %ld\n", lrc);
1956        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1957        printf("  Default recursion depth limit = %ld\n", lrc);
1958        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1959        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1960        goto EXIT;
1961        }
1962      else if (strcmp(argv[op], "-help") == 0 ||
1963               strcmp(argv[op], "--help") == 0)
1964        {
1965        usage();
1966        goto EXIT;
1967        }
1968    else    else
1969      {      {
1970      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1971      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1972      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
1973             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
1974      }      }
1975    op++;    op++;
1976    argc--;    argc--;
1977    }    }
1978    
1979    /* Get the store for the offsets vector, and remember what it was */
1980    
1981    size_offsets_max = size_offsets;
1982    offsets = (int *)malloc(size_offsets_max * sizeof(int));
1983    if (offsets == NULL)
1984      {
1985      printf("** Failed to get %d bytes of memory for offsets vector\n",
1986        (int)(size_offsets_max * sizeof(int)));
1987      yield = 1;
1988      goto EXIT;
1989      }
1990    
1991  /* Sort out the input and output files */  /* Sort out the input and output files */
1992    
1993  if (argc > 1)  if (argc > 1)
1994    {    {
1995    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
1996    if (infile == NULL)    if (infile == NULL)
1997      {      {
1998      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1999      return 1;      yield = 1;
2000        goto EXIT;
2001      }      }
2002    }    }
2003    
2004  if (argc > 2)  if (argc > 2)
2005    {    {
2006    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
2007    if (outfile == NULL)    if (outfile == NULL)
2008      {      {
2009      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
2010      return 1;      yield = 1;
2011        goto EXIT;
2012      }      }
2013    }    }
2014    
2015  /* Set alternative malloc function */  /* Set alternative malloc function */
2016    
2017    #ifdef SUPPORT_PCRE8
2018  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2019    pcre_free = new_free;
2020    pcre_stack_malloc = stack_malloc;
2021    pcre_stack_free = stack_free;
2022    #endif
2023    
2024    #ifdef SUPPORT_PCRE16
2025    pcre16_malloc = new_malloc;
2026    pcre16_free = new_free;
2027    pcre16_stack_malloc = stack_malloc;
2028    pcre16_stack_free = stack_free;
2029    #endif
2030    
2031  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2032    
2033  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
2034    
2035  /* Main loop */  /* Main loop */
2036    
# Line 376  while (!done) Line 2041  while (!done)
2041    
2042  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
2043    regex_t preg;    regex_t preg;
2044      int do_posix = 0;
2045  #endif  #endif
2046    
2047    const char *error;    const char *error;
2048    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2049    unsigned const char *tables = NULL;    pcre_uint8 *p, *pp, *ppp;
2050      pcre_uint8 *to_file = NULL;
2051      const pcre_uint8 *tables = NULL;
2052      unsigned long int true_size, true_study_size = 0;
2053      size_t size, regex_gotten_store;
2054      int do_allcaps = 0;
2055      int do_mark = 0;
2056    int do_study = 0;    int do_study = 0;
2057      int no_force_study = 0;
2058    int do_debug = debug;    int do_debug = debug;
2059    int do_G = 0;    int do_G = 0;
2060    int do_g = 0;    int do_g = 0;
2061    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2062    int do_showrest = 0;    int do_showrest = 0;
2063    int do_posix = 0;    int do_showcaprest = 0;
2064    int erroroffset, len, delimiter;    int do_flip = 0;
2065      int erroroffset, len, delimiter, poffset;
2066    
2067      use_utf = 0;
2068      debug_lengths = 1;
2069    
2070    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
2071    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2072      fflush(outfile);
2073    
2074    p = buffer;    p = buffer;
2075    while (isspace(*p)) p++;    while (isspace(*p)) p++;
2076    if (*p == 0) continue;    if (*p == 0) continue;
2077    
2078    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
2079    complete, read more. */  
2080      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2081        {
2082        unsigned long int magic, get_options;
2083        pcre_uint8 sbuf[8];
2084        FILE *f;
2085    
2086        p++;
2087        pp = p + (int)strlen((char *)p);
2088        while (isspace(pp[-1])) pp--;
2089        *pp = 0;
2090    
2091        f = fopen((char *)p, "rb");
2092        if (f == NULL)
2093          {
2094          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
2095          continue;
2096          }
2097    
2098        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2099    
2100        true_size =
2101          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
2102        true_study_size =
2103          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2104    
2105        re = (real_pcre *)new_malloc(true_size);
2106        regex_gotten_store = first_gotten_store;
2107    
2108        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2109    
2110        magic = ((real_pcre *)re)->magic_number;
2111        if (magic != MAGIC_NUMBER)
2112          {
2113          if (swap_uint32(magic) == MAGIC_NUMBER)
2114            {
2115            do_flip = 1;
2116            }
2117          else
2118            {
2119            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
2120            fclose(f);
2121            continue;
2122            }
2123          }
2124    
2125        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2126          do_flip? " (byte-inverted)" : "", p);
2127    
2128        /* Now see if there is any following study data. */
2129    
2130        if (true_study_size != 0)
2131          {
2132          pcre_study_data *psd;
2133    
2134          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
2135          extra->flags = PCRE_EXTRA_STUDY_DATA;
2136    
2137          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
2138          extra->study_data = psd;
2139    
2140          if (fread(psd, 1, true_study_size, f) != true_study_size)
2141            {
2142            FAIL_READ:
2143            fprintf(outfile, "Failed to read data from %s\n", p);
2144            if (extra != NULL)
2145              {
2146              PCRE_FREE_STUDY(extra);
2147              }
2148            if (re != NULL) new_free(re);
2149            fclose(f);
2150            continue;
2151            }
2152          fprintf(outfile, "Study data loaded from %s\n", p);
2153          do_study = 1;     /* To get the data output if requested */
2154          }
2155        else fprintf(outfile, "No study data\n");
2156    
2157        /* Flip the necessary bytes. */
2158        if (do_flip)
2159          {
2160          PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2161          }
2162    
2163        /* Need to know if UTF-8 for printing data strings */
2164    
2165        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2166        use_utf = (get_options & PCRE_UTF8) != 0;
2167    
2168        fclose(f);
2169        goto SHOW_INFO;
2170        }
2171    
2172      /* In-line pattern (the usual case). Get the delimiter and seek the end of
2173      the pattern; if it isn't complete, read more. */
2174    
2175    delimiter = *p++;    delimiter = *p++;
2176    
2177    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2178      {      {
2179      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2180      goto SKIP_DATA;      goto SKIP_DATA;
2181      }      }
2182    
2183    pp = p;    pp = p;
2184      poffset = (int)(p - buffer);
2185    
2186    for(;;)    for(;;)
2187      {      {
# Line 420  while (!done) Line 2192  while (!done)
2192        pp++;        pp++;
2193        }        }
2194      if (*pp != 0) break;      if (*pp != 0) break;
2195        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
2196        {        {
2197        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2198        done = 1;        done = 1;
# Line 438  while (!done) Line 2201  while (!done)
2201      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
2202      }      }
2203    
2204      /* The buffer may have moved while being extended; reset the start of data
2205      pointer to the correct relative point in the buffer. */
2206    
2207      p = buffer + poffset;
2208    
2209    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
2210    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
2211    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
2212    
2213    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
2214    
2215    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
2216      for callouts. */
2217    
2218    *pp++ = 0;    *pp++ = 0;
2219      strcpy((char *)pbuffer, (char *)p);
2220    
2221    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2222    
# Line 458  while (!done) Line 2228  while (!done)
2228      {      {
2229      switch (*pp++)      switch (*pp++)
2230        {        {
2231          case 'f': options |= PCRE_FIRSTLINE; break;
2232        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
2233        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
2234        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
2235        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2236        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2237    
2238        case '+': do_showrest = 1; break;        case '+':
2239          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2240          break;
2241    
2242          case '=': do_allcaps = 1; break;
2243        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2244          case 'B': do_debug = 1; break;
2245          case 'C': options |= PCRE_AUTO_CALLOUT; break;
2246        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
2247        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
2248          case 'F': do_flip = 1; break;
2249        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2250        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2251          case 'J': options |= PCRE_DUPNAMES; break;
2252          case 'K': do_mark = 1; break;
2253        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2254          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2255    
2256  #if !defined NOPOSIX  #if !defined NOPOSIX
2257        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2258  #endif  #endif
2259    
2260        case 'S': do_study = 1; break;        case 'S':
2261          if (do_study == 0)
2262            {
2263            do_study = 1;
2264            if (*pp == '+')
2265              {
2266              study_options |= PCRE_STUDY_JIT_COMPILE;
2267              pp++;
2268              }
2269            }
2270          else
2271            {
2272            do_study = 0;
2273            no_force_study = 1;
2274            }
2275          break;
2276    
2277        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2278          case 'W': options |= PCRE_UCP; break;
2279        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2280          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2281          case 'Z': debug_lengths = 0; break;
2282          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2283          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2284    
2285          case 'T':
2286          switch (*pp++)
2287            {
2288            case '0': tables = tables0; break;
2289            case '1': tables = tables1; break;
2290    
2291            case '\r':
2292            case '\n':
2293            case ' ':
2294            case 0:
2295            fprintf(outfile, "** Missing table number after /T\n");
2296            goto SKIP_DATA;
2297    
2298            default:
2299            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2300            goto SKIP_DATA;
2301            }
2302          break;
2303    
2304        case 'L':        case 'L':
2305        ppp = pp;        ppp = pp;
2306        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2307          /* The '0' test is just in case this is an unterminated line. */
2308          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2309        *ppp = 0;        *ppp = 0;
2310        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2311          {          {
2312          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2313          goto SKIP_DATA;          goto SKIP_DATA;
2314          }          }
2315          locale_set = 1;
2316        tables = pcre_maketables();        tables = pcre_maketables();
2317        pp = ppp;        pp = ppp;
2318        break;        break;
2319    
2320        case '\n': case ' ': break;        case '>':
2321          to_file = pp;
2322          while (*pp != 0) pp++;
2323          while (isspace(pp[-1])) pp--;
2324          *pp = 0;
2325          break;
2326    
2327          case '<':
2328            {
2329            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2330              {
2331              options |= PCRE_JAVASCRIPT_COMPAT;
2332              pp += 3;
2333              }
2334            else
2335              {
2336              int x = check_newline(pp, outfile);
2337              if (x == 0) goto SKIP_DATA;
2338              options |= x;
2339              while (*pp++ != '>');
2340              }
2341            }
2342          break;
2343    
2344          case '\r':                      /* So that it works in Windows */
2345          case '\n':
2346          case ' ':
2347          break;
2348    
2349        default:        default:
2350        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2351        goto SKIP_DATA;        goto SKIP_DATA;
# Line 502  while (!done) Line 2354  while (!done)
2354    
2355    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2356    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2357    local character tables. */    local character tables. Neither does it have 16-bit support. */
2358    
2359  #if !defined NOPOSIX  #if !defined NOPOSIX
2360    if (posix || do_posix)    if (posix || do_posix)
2361      {      {
2362      int rc;      int rc;
2363      int cflags = 0;      int cflags = 0;
2364    
2365      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2366      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2367        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2368        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2369        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2370        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2371        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2372    
2373        first_gotten_store = 0;
2374      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2375    
2376      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 518  while (!done) Line 2378  while (!done)
2378    
2379      if (rc != 0)      if (rc != 0)
2380        {        {
2381        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2382        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2383        goto SKIP_DATA;        goto SKIP_DATA;
2384        }        }
# Line 530  while (!done) Line 2390  while (!done)
2390  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2391    
2392      {      {
2393      if (timeit)      unsigned long int get_options;
2394    
2395        /* In 16-bit mode, convert the input. */
2396    
2397    #ifdef SUPPORT_PCRE16
2398        if (use_pcre16)
2399          {
2400          if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2401            {
2402            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2403              "converted to UTF-16\n");
2404            goto SKIP_DATA;
2405            }
2406          p = (pcre_uint8 *)buffer16;
2407          }
2408    #endif
2409    
2410        /* Compile many times when timing */
2411    
2412        if (timeit > 0)
2413        {        {
2414        register int i;        register int i;
2415        clock_t time_taken;        clock_t time_taken;
2416        clock_t start_time = clock();        clock_t start_time = clock();
2417        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
2418          {          {
2419          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2420          if (re != NULL) free(re);          if (re != NULL) free(re);
2421          }          }
2422        time_taken = clock() - start_time;        time_taken = clock() - start_time;
2423        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
2424          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
2425          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
2426        }        }
2427    
2428      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2429        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2430    
2431      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2432      if non-interactive. */      if non-interactive. */
# Line 559  while (!done) Line 2439  while (!done)
2439          {          {
2440          for (;;)          for (;;)
2441            {            {
2442            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2443              {              {
2444              done = 1;              done = 1;
2445              goto CONTINUE;              goto CONTINUE;
# Line 573  while (!done) Line 2453  while (!done)
2453        goto CONTINUE;        goto CONTINUE;
2454        }        }
2455    
2456      /* Compilation succeeded; print data if required */      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2457        within the regex; check for this so that we know how to process the data
2458        lines. */
2459    
2460        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2461        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2462    
2463        /* Extract the size for possible writing before possibly flipping it,
2464        and remember the store that was got. */
2465    
2466        true_size = ((real_pcre *)re)->size;
2467        regex_gotten_store = first_gotten_store;
2468    
2469        /* Output code size information if requested */
2470    
2471        if (log_store)
2472          fprintf(outfile, "Memory allocation (code space): %d\n",
2473            (int)(first_gotten_store -
2474                  sizeof(real_pcre) -
2475                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2476    
2477        /* If -s or /S was present, study the regex to generate additional info to
2478        help with the matching, unless the pattern has the SS option, which
2479        suppresses the effect of /S (used for a few test patterns where studying is
2480        never sensible). */
2481    
2482        if (do_study || (force_study >= 0 && !no_force_study))
2483          {
2484          if (timeit > 0)
2485            {
2486            register int i;
2487            clock_t time_taken;
2488            clock_t start_time = clock();
2489            for (i = 0; i < timeit; i++)
2490              {
2491              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2492              }
2493            time_taken = clock() - start_time;
2494            if (extra != NULL)
2495              {
2496              PCRE_FREE_STUDY(extra);
2497              }
2498            fprintf(outfile, "  Study time %.4f milliseconds\n",
2499              (((double)time_taken * 1000.0) / (double)timeit) /
2500                (double)CLOCKS_PER_SEC);
2501            }
2502          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2503          if (error != NULL)
2504            fprintf(outfile, "Failed to study: %s\n", error);
2505          else if (extra != NULL)
2506            {
2507            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2508            if (log_store)
2509              {
2510              size_t jitsize;
2511              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2512              if (jitsize != 0)
2513                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2514              }
2515            }
2516          }
2517    
2518        /* If /K was present, we set up for handling MARK data. */
2519    
2520        if (do_mark)
2521          {
2522          if (extra == NULL)
2523            {
2524            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2525            extra->flags = 0;
2526            }
2527          extra->mark = &markptr;
2528          extra->flags |= PCRE_EXTRA_MARK;
2529          }
2530    
2531        /* Extract and display information from the compiled data if required. */
2532    
2533        SHOW_INFO:
2534    
2535        if (do_debug)
2536          {
2537          fprintf(outfile, "------------------------------------------------------------------\n");
2538    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2539          if (use_pcre16)
2540            pcre16_printint(re, outfile, debug_lengths);
2541          else
2542            pcre_printint(re, outfile, debug_lengths);
2543    #elif defined SUPPORT_PCRE8
2544          pcre_printint(re, outfile, debug_lengths);
2545    #else
2546          pcre16_printint(re, outfile, debug_lengths);
2547    #endif
2548          }
2549    
2550        /* We already have the options in get_options (see above) */
2551    
2552      if (do_showinfo)      if (do_showinfo)
2553        {        {
2554        int first_char, count;        unsigned long int all_options;
2555    #if !defined NOINFOCHECK
2556          int old_first_char, old_options, old_count;
2557    #endif
2558          int count, backrefmax, first_char, need_char, okpartial, jchanged,
2559            hascrorlf;
2560          int nameentrysize, namecount;
2561          const pcre_uchar *nametable;
2562    
2563          new_info(re, NULL, PCRE_INFO_SIZE, &size);
2564          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2565          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2566          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2567          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2568          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2569          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2570          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2571          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2572          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2573          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2574    
2575          /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2576          that it gives the same results as the new function. */
2577    
2578    #if !defined NOINFOCHECK
2579          if (!use_pcre16)
2580            {
2581            old_count = pcre_info(re, &old_options, &old_first_char);
2582            if (count < 0) fprintf(outfile,
2583              "Error %d from pcre_info()\n", count);
2584            else
2585              {
2586              if (old_count != count) fprintf(outfile,
2587                "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2588                  old_count);
2589    
2590              if (old_first_char != first_char) fprintf(outfile,
2591                "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2592                  first_char, old_first_char);
2593    
2594              if (old_options != (int)get_options) fprintf(outfile,
2595                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2596                  get_options, old_options);
2597              }
2598            }
2599    #endif
2600    
2601          if (size != regex_gotten_store) fprintf(outfile,
2602            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2603            (int)size, (int)regex_gotten_store);
2604    
2605          fprintf(outfile, "Capturing subpattern count = %d\n", count);
2606          if (backrefmax > 0)
2607            fprintf(outfile, "Max back reference = %d\n", backrefmax);
2608    
2609          if (namecount > 0)
2610            {
2611            fprintf(outfile, "Named capturing subpatterns:\n");
2612            while (namecount-- > 0)
2613              {
2614              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
2615                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2616                GET2(nametable, 0));
2617              nametable += nameentrysize;
2618              }
2619            }
2620    
2621          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2622          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2623    
2624          all_options = ((real_pcre *)re)->options;
2625          if (do_flip) all_options = swap_uint32(all_options);
2626    
2627          if (get_options == 0) fprintf(outfile, "No options\n");
2628            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2629              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2630              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2631              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2632              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2633              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2634              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2635              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2636              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2637              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2638              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2639              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2640              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2641              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2642              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2643              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2644              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2645              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2646    
2647          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2648    
2649          switch (get_options & PCRE_NEWLINE_BITS)
2650            {
2651            case PCRE_NEWLINE_CR:
2652            fprintf(outfile, "Forced newline sequence: CR\n");
2653            break;
2654    
2655            case PCRE_NEWLINE_LF:
2656            fprintf(outfile, "Forced newline sequence: LF\n");
2657            break;
2658    
2659            case PCRE_NEWLINE_CRLF:
2660            fprintf(outfile, "Forced newline sequence: CRLF\n");
2661            break;
2662    
2663            case PCRE_NEWLINE_ANYCRLF:
2664            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2665            break;
2666    
2667            case PCRE_NEWLINE_ANY:
2668            fprintf(outfile, "Forced newline sequence: ANY\n");
2669            break;
2670    
2671        if (do_debug) print_internals(re);          default:
2672            break;
2673            }
2674    
2675        count = pcre_info(re, &options, &first_char);        if (first_char == -1)
2676        if (count < 0) fprintf(outfile,          {
2677          "Error %d while reading info\n", count);          fprintf(outfile, "First char at start or follows newline\n");
2678            }
2679          else if (first_char < 0)
2680            {
2681            fprintf(outfile, "No first char\n");
2682            }
2683        else        else
2684          {          {
2685          fprintf(outfile, "Identifying subpattern count = %d\n", count);          const char *caseless =
2686          if (options == 0) fprintf(outfile, "No options\n");            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2687            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",            "" : " (caseless)";
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "",  
             ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
   
         if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)  
           fprintf(outfile, "Case state changes\n");  
2688    
2689          if (first_char == -1)          if (PRINTOK(first_char))
2690              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2691            else
2692            {            {
2693            fprintf(outfile, "First char at start or follows \\n\n");            fprintf(outfile, "First char = ");
2694              pchar(first_char, outfile);
2695              fprintf(outfile, "%s\n", caseless);
2696            }            }
2697          else if (first_char < 0)          }
2698    
2699          if (need_char < 0)
2700            {
2701            fprintf(outfile, "No need char\n");
2702            }
2703          else
2704            {
2705            const char *caseless =
2706              ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2707              "" : " (caseless)";
2708    
2709            if (PRINTOK(need_char))
2710              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2711            else
2712            {            {
2713            fprintf(outfile, "No first char\n");            fprintf(outfile, "Need char = ");
2714              pchar(need_char, outfile);
2715              fprintf(outfile, "%s\n", caseless);
2716            }            }
2717            }
2718    
2719          /* Don't output study size; at present it is in any case a fixed
2720          value, but it varies, depending on the computer architecture, and
2721          so messes up the test suite. (And with the /F option, it might be
2722          flipped.) If study was forced by an external -s, don't show this
2723          information unless -i or -d was also present. This means that, except
2724          when auto-callouts are involved, the output from runs with and without
2725          -s should be identical. */
2726    
2727          if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2728            {
2729            if (extra == NULL)
2730              fprintf(outfile, "Study returned NULL\n");
2731          else          else
2732            {            {
2733            if (isprint(first_char))            pcre_uint8 *start_bits = NULL;
2734              fprintf(outfile, "First char = \'%c\'\n", first_char);            int minlength;
2735    
2736              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2737              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2738    
2739              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2740              if (start_bits == NULL)
2741                fprintf(outfile, "No set of starting bytes\n");
2742            else            else
2743              fprintf(outfile, "First char = %d\n", first_char);              {
2744                int i;
2745                int c = 24;
2746                fprintf(outfile, "Starting byte set: ");
2747                for (i = 0; i < 256; i++)
2748                  {
2749                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
2750                    {
2751                    if (c > 75)
2752                      {
2753                      fprintf(outfile, "\n  ");
2754                      c = 2;
2755                      }
2756                    if (PRINTOK(i) && i != ' ')
2757                      {
2758                      fprintf(outfile, "%c ", i);
2759                      c += 2;
2760                      }
2761                    else
2762                      {
2763                      fprintf(outfile, "\\x%02x ", i);
2764                      c += 5;
2765                      }
2766                    }
2767                  }
2768                fprintf(outfile, "\n");
2769                }
2770            }            }
2771    
2772          if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)          /* Show this only if the JIT was set by /S, not by -s. */
2773    
2774            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2775            {            {
2776            int req_char = ((real_pcre *)re)->req_char;            int jit;
2777            if (isprint(req_char))            new_info(re, extra, PCRE_INFO_JIT, &jit);
2778              fprintf(outfile, "Req char = \'%c\'\n", req_char);            if (jit)
2779                fprintf(outfile, "JIT study was successful\n");
2780            else            else
2781              fprintf(outfile, "Req char = %d\n", req_char);  #ifdef SUPPORT_JIT
2782                fprintf(outfile, "JIT study was not successful\n");
2783    #else
2784                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2785    #endif
2786            }            }
         else fprintf(outfile, "No req char\n");  
2787          }          }
2788        }        }
2789    
2790      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
2791      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
2792        the study length, in big-endian order. */
2793    
2794      if (do_study)      if (to_file != NULL)
2795        {        {
2796        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
2797          if (f == NULL)
2798          {          {
2799          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
2800          }          }
2801          else
2802            {
2803            pcre_uint8 sbuf[8];
2804    
2805        extra = pcre_study(re, study_options, &error);          if (do_flip) regexflip(re, extra);
2806        if (error != NULL)          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2807          fprintf(outfile, "Failed to study: %s\n", error);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2808        else if (extra == NULL)          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2809          fprintf(outfile, "Study returned NULL\n");          sbuf[3] = (pcre_uint8)((true_size) & 255);
2810            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2811        /* This looks at internal information. A bit kludgy to do it this          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2812        way, but it is useful for testing. */          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2813            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2814    
2815        else if (do_showinfo)          if (fwrite(sbuf, 1, 8, f) < 8 ||
2816          {              fwrite(re, 1, true_size, f) < true_size)
2817          real_pcre_extra *xx = (real_pcre_extra *)extra;            {
2818          if ((xx->options & PCRE_STUDY_MAPPED) == 0)            fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2819            fprintf(outfile, "No starting character set\n");            }
2820          else          else
2821            {            {
2822            int i;            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2823            int c = 24;  
2824            fprintf(outfile, "Starting character set: ");            /* If there is study data, write it. */
2825            for (i = 0; i < 256; i++)  
2826              if (extra != NULL)
2827              {              {
2828              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
2829                    true_study_size)
2830                {                {
2831                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
2832                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
2833                }                }
2834                else fprintf(outfile, "Study data written to %s\n", to_file);
2835              }              }
           fprintf(outfile, "\n");  
2836            }            }
2837            fclose(f);
2838            }
2839    
2840          new_free(re);
2841          if (extra != NULL)
2842            {
2843            PCRE_FREE_STUDY(extra);
2844            }
2845          if (locale_set)
2846            {
2847            new_free((void *)tables);
2848            setlocale(LC_CTYPE, "C");
2849            locale_set = 0;
2850          }          }
2851          continue;  /* With next regex */
2852        }        }
2853      }      }        /* End of non-POSIX compile */
2854    
2855    /* Read data lines and test them */    /* Read data lines and test them */
2856    
2857    for (;;)    for (;;)
2858      {      {
2859      unsigned char *q;      pcre_uint8 *q;
2860      unsigned char *bptr = dbuffer;      pcre_uint8 *bptr;
2861        int *use_offsets = offsets;
2862        int use_size_offsets = size_offsets;
2863        int callout_data = 0;
2864        int callout_data_set = 0;
2865      int count, c;      int count, c;
2866      int copystrings = 0;      int copystrings = 0;
2867        int find_match_limit = default_find_match_limit;
2868      int getstrings = 0;      int getstrings = 0;
2869      int getlist = 0;      int getlist = 0;
2870      int gmatched = 0;      int gmatched = 0;
2871      int start_offset = 0;      int start_offset = 0;
2872      int offsets[45];      int start_offset_sign = 1;
2873      int size_offsets = sizeof(offsets)/sizeof(int);      int g_notempty = 0;
2874        int use_dfa = 0;
2875    
2876      options = 0;      options = 0;
2877    
2878      if (infile == stdin) printf("data> ");      *copynames = 0;
2879      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
2880    
2881        copynamesptr = copynames;
2882        getnamesptr = getnames;
2883    
2884        pcre_callout = callout;
2885        first_callout = 1;
2886        last_callout_mark = NULL;
2887        callout_extra = 0;
2888        callout_count = 0;
2889        callout_fail_count = 999999;
2890        callout_fail_id = -1;
2891        show_malloc = 0;
2892    
2893        if (extra != NULL) extra->flags &=
2894          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2895    
2896        len = 0;
2897        for (;;)
2898        {        {
2899        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2900        goto CONTINUE;          {
2901            if (len > 0)    /* Reached EOF without hitting a newline */
2902              {
2903              fprintf(outfile, "\n");
2904              break;
2905              }
2906            done = 1;
2907            goto CONTINUE;
2908            }
2909          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2910          len = (int)strlen((char *)buffer);
2911          if (buffer[len-1] == '\n') break;
2912        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2913    
     len = (int)strlen((char *)buffer);  
2914      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2915      buffer[len] = 0;      buffer[len] = 0;
2916      if (len == 0) break;      if (len == 0) break;
# Line 727  while (!done) Line 2918  while (!done)
2918      p = buffer;      p = buffer;
2919      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2920    
2921      q = dbuffer;      bptr = q = dbuffer;
2922      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2923        {        {
2924        int i = 0;        int i = 0;
2925        int n = 0;        int n = 0;
2926    
2927        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
2928          {          {
2929          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 748  while (!done) Line 2940  while (!done)
2940          c -= '0';          c -= '0';
2941          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2942            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2943    
2944    #if !defined NOUTF8
2945            if (use_utf && c > 255)
2946              {
2947              pcre_uint8 buff8[8];
2948              int ii, utn;
2949              utn = ord2utf8(c, buff8);
2950              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2951              c = buff8[ii];   /* Last byte */
2952              }
2953    #endif
2954          break;          break;
2955    
2956          case 'x':          case 'x':
2957    
2958            /* Handle \x{..} specially - new Perl thing for utf8 */
2959    
2960    #if !defined NOUTF8
2961            if (*p == '{')
2962              {
2963              pcre_uint8 *pt = p;
2964              c = 0;
2965    
2966              /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2967              when isxdigit() is a macro that refers to its argument more than
2968              once. This is banned by the C Standard, but apparently happens in at
2969              least one MacOS environment. */
2970    
2971              for (pt++; isxdigit(*pt); pt++)
2972                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2973              if (*pt == '}')
2974                {
2975                pcre_uint8 buff8[8];
2976                int ii, utn;
2977                if (use_utf)
2978                  {
2979                  utn = ord2utf8(c, buff8);
2980                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2981                  c = buff8[ii];   /* Last byte */
2982                  }
2983                else
2984                 {
2985                 if (c > 255)
2986                   {
2987                   if (use_pcre16)
2988                     fprintf(outfile, "** Character \\x{%x} is greater than 255.\n"
2989                       "** Because its input is first processed as 8-bit, pcretest "
2990                       "does not\n** support such characters in 16-bit mode when "
2991                       "UTF-16 is not set.\n", c);
2992                   else
2993                     fprintf(outfile, "** Character \\x{%x} is greater than 255 "
2994                       "and UTF-8 mode is not enabled.\n", c);
2995    
2996                   fprintf(outfile, "** Truncation will probably give the wrong "
2997                     "result.\n");
2998                   }
2999                 }
3000                p = pt + 1;
3001                break;
3002                }
3003              /* Not correct form; fall through */
3004              }
3005    #endif
3006    
3007            /* Ordinary \x */
3008    
3009          c = 0;          c = 0;
3010          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3011            {            {
3012            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3013            p++;            p++;
3014            }            }
3015          break;          break;
3016    
3017          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
3018          p--;          p--;
3019          continue;          continue;
3020    
3021            case '>':
3022            if (*p == '-')
3023              {
3024              start_offset_sign = -1;
3025              p++;
3026              }
3027            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3028            start_offset *= start_offset_sign;
3029            continue;
3030    
3031          case 'A':  /* Option setting */          case 'A':  /* Option setting */
3032          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
3033          continue;          continue;
# Line 772  while (!done) Line 3037  while (!done)
3037          continue;          continue;
3038    
3039          case 'C':          case 'C':
3040          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
3041          copystrings |= 1 << n;            {
3042              while(isdigit(*p)) n = n * 10 + *p++ - '0';
3043              copystrings |= 1 << n;
3044              }
3045            else if (isalnum(*p))
3046              {
3047              pcre_uchar *npp = copynamesptr;
3048              while (isalnum(*p)) *npp++ = *p++;
3049              *npp++ = 0;
3050              *npp = 0;
3051              n = pcre_get_stringnumber(re, (char *)copynamesptr);
3052              if (n < 0)
3053                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
3054              copynamesptr = npp;
3055              }
3056            else if (*p == '+')
3057              {
3058              callout_extra = 1;
3059              p++;
3060              }
3061            else if (*p == '-')
3062              {
3063              pcre_callout = NULL;
3064              p++;
3065              }
3066            else if (*p == '!')
3067              {
3068              callout_fail_id = 0;
3069              p++;
3070              while(isdigit(*p))
3071                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
3072              callout_fail_count = 0;
3073              if (*p == '!')
3074                {
3075                p++;
3076                while(isdigit(*p))
3077                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
3078                }
3079              }
3080            else if (*p == '*')
3081              {
3082              int sign = 1;
3083              callout_data = 0;
3084              if (*(++p) == '-') { sign = -1; p++; }
3085              while(isdigit(*p))
3086                callout_data = callout_data * 10 + *p++ - '0';
3087              callout_data *= sign;
3088              callout_data_set = 1;
3089              }
3090            continue;
3091    
3092    #if !defined NODFA
3093            case 'D':
3094    #if !defined NOPOSIX
3095            if (posix || do_posix)
3096              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
3097            else
3098    #endif
3099              use_dfa = 1;
3100            continue;
3101    #endif
3102    
3103    #if !defined NODFA
3104            case 'F':
3105            options |= PCRE_DFA_SHORTEST;
3106          continue;          continue;
3107    #endif
3108    
3109          case 'G':          case 'G':
3110            if (isdigit(*p))
3111              {
3112              while(isdigit(*p)) n = n * 10 + *p++ - '0';
3113              getstrings |= 1 << n;
3114              }
3115            else if (isalnum(*p))
3116              {
3117              pcre_uchar *npp = getnamesptr;
3118              while (isalnum(*p)) *npp++ = *p++;
3119              *npp++ = 0;
3120              *npp = 0;
3121              n = pcre_get_stringnumber(re, (char *)getnamesptr);
3122              if (n < 0)
3123                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
3124              getnamesptr = npp;
3125              }
3126            continue;
3127    
3128            case 'J':
3129          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3130          getstrings |= 1 << n;          if (extra != NULL
3131                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3132                && extra->executable_jit != NULL)
3133              {
3134              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3135              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
3136              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
3137              }
3138          continue;          continue;
3139    
3140          case 'L':          case 'L':
3141          getlist = 1;          getlist = 1;
3142          continue;          continue;
3143    
3144            case 'M':
3145            find_match_limit = 1;
3146            continue;
3147    
3148          case 'N':          case 'N':
3149          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3150              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3151            else
3152              options |= PCRE_NOTEMPTY;
3153          continue;          continue;
3154    
3155          case 'O':          case 'O':
3156          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3157          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
3158              {
3159              size_offsets_max = n;
3160              free(offsets);
3161              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
3162              if (offsets == NULL)
3163                {
3164                printf("** Failed to get %d bytes of memory for offsets vector\n",
3165                  (int)(size_offsets_max * sizeof(int)));
3166                yield = 1;
3167                goto EXIT;
3168                }
3169              }
3170            use_size_offsets = n;
3171            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
3172            continue;
3173    
3174            case 'P':
3175            options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3176              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3177            continue;
3178    
3179            case 'Q':
3180            while(isdigit(*p)) n = n * 10 + *p++ - '0';
3181            if (extra == NULL)
3182              {
3183              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3184              extra->flags = 0;
3185              }
3186            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
3187            extra->match_limit_recursion = n;
3188            continue;
3189    
3190            case 'q':
3191            while(isdigit(*p)) n = n * 10 + *p++ - '0';
3192            if (extra == NULL)
3193              {
3194              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3195              extra->flags = 0;
3196              }
3197            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
3198            extra->match_limit = n;
3199            continue;
3200    
3201    #if !defined NODFA
3202            case 'R':
3203            options |= PCRE_DFA_RESTART;
3204            continue;
3205    #endif
3206    
3207            case 'S':
3208            show_malloc = 1;
3209            continue;
3210    
3211            case 'Y':
3212            options |= PCRE_NO_START_OPTIMIZE;
3213          continue;          continue;
3214    
3215          case 'Z':          case 'Z':
3216          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3217          continue;          continue;
3218    
3219            case '?':
3220            options |= PCRE_NO_UTF8_CHECK;
3221            continue;
3222    
3223            case '<':
3224              {
3225              int x = check_newline(p, outfile);
3226              if (x == 0) goto NEXT_DATA;
3227              options |= x;
3228              while (*p++ != '>');
3229              }
3230            continue;
3231          }          }
3232        *q++ = c;        *q++ = c;
3233        }        }
3234      *q = 0;      *q = 0;
3235      len = q - dbuffer;      len = (int)(q - dbuffer);
3236    
3237        /* Move the data to the end of the buffer so that a read over the end of
3238        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3239        we are using the POSIX interface, we must include the terminating zero. */
3240    
3241    #if !defined NOPOSIX
3242        if (posix || do_posix)
3243          {
3244          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3245          bptr += buffer_size - len - 1;
3246          }
3247        else
3248    #endif
3249          {
3250          memmove(bptr + buffer_size - len, bptr, len);
3251          bptr += buffer_size - len;
3252          }
3253    
3254        if ((all_use_dfa || use_dfa) && find_match_limit)
3255          {
3256          printf("**Match limit not relevant for DFA matching: ignored\n");
3257          find_match_limit = 0;
3258          }
3259    
3260      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
3261      support timing. */      support timing or playing with the match limit or callout data. */
3262    
3263  #if !defined NOPOSIX  #if !defined NOPOSIX
3264      if (posix || do_posix)      if (posix || do_posix)
3265        {        {
3266        int rc;        int rc;
3267        int eflags = 0;        int eflags = 0;
3268        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
3269          if (use_size_offsets > 0)
3270            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3271        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3272        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3273          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3274    
3275        rc = regexec(&preg, (const char *)bptr,        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);  
3276    
3277        if (rc != 0)        if (rc != 0)
3278          {          {
3279          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3280          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3281          }          }
3282          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3283                  != 0)
3284            {
3285            fprintf(outfile, "Matched with REG_NOSUB\n");
3286            }
3287        else        else
3288          {          {
3289          size_t i;          size_t i;
3290          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
3291            {            {
3292            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3293              {              {
3294              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3295              pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer + pmatch[i].rm_so,
3296                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3297              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3298              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3299                {                {
3300                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3301                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3302                    outfile);
3303                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3304                }                }
3305              }              }
3306            }            }
3307          }          }
3308          free(pmatch);
3309          goto NEXT_DATA;
3310        }        }
3311    
3312    #endif  /* !defined NOPOSIX */
3313    
3314      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3315    
3316      else  #ifdef SUPPORT_PCRE16
3317  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3318          {
3319          len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3320          if (len < 0)
3321            {
3322            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3323              "converted to UTF-16\n");