/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 55 by nigel, Sat Feb 24 21:39:46 2007 UTC code/branches/pcre16/pcretest.c revision 810 by ph10, Mon Dec 19 13:34:10 2011 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10    -----------------------------------------------------------------------------
11    Redistribution and use in source and binary forms, with or without
12    modification, are permitted provided that the following conditions are met:
13    
14        * Redistributions of source code must retain the above copyright notice,
15          this list of conditions and the following disclaimer.
16    
17        * Redistributions in binary form must reproduce the above copyright
18          notice, this list of conditions and the following disclaimer in the
19          documentation and/or other materials provided with the distribution.
20    
21        * Neither the name of the University of Cambridge nor the names of its
22          contributors may be used to endorse or promote products derived from
23          this software without specific prior written permission.
24    
25    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35    POSSIBILITY OF SUCH DAMAGE.
36    -----------------------------------------------------------------------------
37    */
38    
39    
40    #ifdef HAVE_CONFIG_H
41    #include "config.h"
42    #endif
43    
44  #include <ctype.h>  #include <ctype.h>
45  #include <stdio.h>  #include <stdio.h>
46  #include <string.h>  #include <string.h>
47  #include <stdlib.h>  #include <stdlib.h>
48  #include <time.h>  #include <time.h>
49  #include <locale.h>  #include <locale.h>
50    #include <errno.h>
51    
52    #ifdef SUPPORT_LIBREADLINE
53    #ifdef HAVE_UNISTD_H
54    #include <unistd.h>
55    #endif
56    #include <readline/readline.h>
57    #include <readline/history.h>
58    #endif
59    
60    
61    /* A number of things vary for Windows builds. Originally, pcretest opened its
62    input and output without "b"; then I was told that "b" was needed in some
63    environments, so it was added for release 5.0 to both the input and output. (It
64    makes no difference on Unix-like systems.) Later I was told that it is wrong
65    for the input on Windows. I've now abstracted the modes into two macros that
66    are set here, to make it easier to fiddle with them, and removed "b" from the
67    input mode under Windows. */
68    
69    #if defined(_WIN32) || defined(WIN32)
70    #include <io.h>                /* For _setmode() */
71    #include <fcntl.h>             /* For _O_BINARY */
72    #define INPUT_MODE   "r"
73    #define OUTPUT_MODE  "wb"
74    
75    #ifndef isatty
76    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
77    #endif                         /* though in some environments they seem to   */
78                                   /* be already defined, hence the #ifndefs.    */
79    #ifndef fileno
80    #define fileno _fileno
81    #endif
82    
83  /* Use the internal info for displaying the results of pcre_study(). */  /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85    #ifdef __BORLANDC__
86    #define _setmode(handle, mode) setmode(handle, mode)
87    #endif
88    
89    /* Not Windows */
90    
91    #else
92    #include <sys/time.h>          /* These two includes are needed */
93    #include <sys/resource.h>      /* for setrlimit(). */
94    #define INPUT_MODE   "rb"
95    #define OUTPUT_MODE  "wb"
96    #endif
97    
98    
99    /* We have to include pcre_internal.h because we need the internal info for
100    displaying the results of pcre_study() and we also need to know about the
101    internal macros, structures, and other internal data values; pcretest has
102    "inside information" compared to a program that strictly follows the PCRE API.
103    
104    Although pcre_internal.h does itself include pcre.h, we explicitly include it
105    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106    appropriately for an application, not for building PCRE. */
107    
108    #include "pcre.h"
109    #include "pcre_internal.h"
110    
111    /* The pcre_printint() function, which prints the internal form of a compiled
112    regex, is held in a separate file so that (a) it can be compiled in either
113    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114    when that is compiled in debug mode. */
115    
116    #ifdef SUPPORT_PCRE8
117    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118    #endif
119    #ifdef SUPPORT_PCRE16
120    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121    #endif
122    
123  #include "internal.h"  /* We need access to some of the data tables that PCRE uses. So as not to have
124    to keep two copies, we include the source file here, changing the names of the
125    external symbols to prevent clashes. */
126    
127    #define _pcre_ucp_gentype      ucp_gentype
128    #define _pcre_ucp_typerange    ucp_typerange
129    #define _pcre_utf8_table1      utf8_table1
130    #define _pcre_utf8_table1_size utf8_table1_size
131    #define _pcre_utf8_table2      utf8_table2
132    #define _pcre_utf8_table3      utf8_table3
133    #define _pcre_utf8_table4      utf8_table4
134    #define _pcre_utt              utt
135    #define _pcre_utt_size         utt_size
136    #define _pcre_utt_names        utt_names
137    #define _pcre_OP_lengths       OP_lengths
138    
139    #include "pcre_tables.c"
140    
141    /* The definition of the macro PRINTABLE, which determines whether to print an
142    output character as-is or as a hex value when showing compiled patterns, is
143    the same as in the printint.src file. We uses it here in cases when the locale
144    has not been explicitly changed, so as to get consistent output from systems
145    that differ in their output from isprint() even in the "C" locale. */
146    
147    #ifdef EBCDIC
148    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149    #else
150    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151    #endif
152    
153    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154    
155  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
156  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 160  Makefile. */
160  #include "pcreposix.h"  #include "pcreposix.h"
161  #endif  #endif
162    
163    /* It is also possible, originally for the benefit of a version that was
164    imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165    without the interface to the DFA matcher (NODFA), and without the doublecheck
166    of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167    out the UTF8 support if PCRE is built without it. */
168    
169    #ifndef SUPPORT_UTF8
170    #ifndef NOUTF8
171    #define NOUTF8
172    #endif
173    #endif
174    
175    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177    only from one place and is handled differently). I couldn't dream up any way of
178    using a single macro to do this in a generic way, because of the many different
179    argument requirements. We know that at least one of SUPPORT_PCRE8 and
180    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181    use these in the definitions of generic macros. */
182    
183    #ifdef SUPPORT_PCRE8
184    #define PCHARS8(lv, p, len, f) \
185      lv = pchars((pcre_uint8 *)p, len, f)
186    
187    #define PCHARSV8(p, len, f) \
188      (void)pchars((pcre_uint8 *)p, len, f)
189    
190    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191      re = pcre_compile((char *)pat, options, error, erroffset, tables)
192    
193    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194        offsets, size_offsets) \
195      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196        offsets, size_offsets)
197    
198    #define PCRE_STUDY8(extra, re, options, error) \
199      extra = pcre_study(re, options, error)
200    
201    #define PCRE_FREE_STUDY8(extra) \
202      pcre_free_study(extra)
203    
204    #endif /* SUPPORT_PCRE8 */
205    
206    
207    #ifdef SUPPORT_PCRE16
208    #define PCHARS16(lv, p, len, f) \
209      lv = pchars16((PCRE_SPTR16)p, len, f)
210    
211    #define PCHARSV16(p, len, f) \
212      (void)pchars16((PCRE_SPTR16)p, len, f)
213    
214    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
215      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
216    
217    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
218        offsets, size_offsets) \
219      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
220        options, offsets, size_offsets)
221    
222    #define PCRE_FREE_STUDY16(extra) \
223      pcre16_free_study(extra)
224    
225    #define PCRE_STUDY16(extra, re, options, error) \
226      extra = pcre16_study(re, options, error)
227    
228    #endif /* SUPPORT_PCRE16 */
229    
230    
231    /* ----- Both modes are supported; a runtime test is needed ----- */
232    
233    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
234    
235    #define PCHARS(lv, p, len, f) \
236      if (use_pcre16) \
237        PCHARS16(lv, p, len, f); \
238      else \
239        PCHARS8(lv, p, len, f)
240    
241    #define PCHARSV(p, len, f) \
242      if (use_pcre16) \
243        PCHARSV16(p, len, f); \
244      else \
245        PCHARSV8(p, len, f)
246    
247    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
248      if (use_pcre16) \
249        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
250      else \
251        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
252    
253    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
254        offsets, size_offsets) \
255      if (use_pcre16) \
256        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
257          offsets, size_offsets); \
258      else \
259        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
260          offsets, size_offsets)
261    
262    #define PCRE_FREE_STUDY(extra) \
263      if (use_pcre16) \
264        PCRE_FREE_STUDY16(extra); \
265      else \
266        PCRE_FREE_STUDY8(extra)
267    
268    #define PCRE_STUDY(extra, re, options, error) \
269      if (use_pcre16) \
270        PCRE_STUDY16(extra, re, options, error); \
271      else \
272        PCRE_STUDY8(extra, re, options, error)
273    
274    /* ----- Only 8-bit mode is supported ----- */
275    
276    #elif defined SUPPORT_PCRE8
277    #define PCHARS           PCHARS8
278    #define PCHARSV          PCHARSV8
279    #define PCRE_COMPILE     PCRE_COMPILE8
280    #define PCRE_EXEC        PCRE_EXEC8
281    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY8
282    #define PCRE_STUDY       PCRE_STUDY8
283    
284    /* ----- Only 16-bit mode is supported ----- */
285    
286    #else
287    #define PCHARS           PCHARS16
288    #define PCHARSV          PCHARSV16
289    #define PCRE_COMPILE     PCRE_COMPILE16
290    #define PCRE_EXEC        PCRE_EXEC16
291    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY16
292    #define PCRE_STUDY       PCRE_STUDY16
293    #endif
294    
295    /* ----- End of mode-specific function call macros ----- */
296    
297    
298    /* Other parameters */
299    
300  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
301  #ifdef CLK_TCK  #ifdef CLK_TCK
302  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 305  Makefile. */
305  #endif  #endif
306  #endif  #endif
307    
308  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
309    
310    #define LOOPREPEAT 500000
311    
312    /* Static variables */
313    
314  static FILE *outfile;  static FILE *outfile;
315  static int log_store = 0;  static int log_store = 0;
316    static int callout_count;
317    static int callout_extra;
318    static int callout_fail_count;
319    static int callout_fail_id;
320    static int debug_lengths;
321    static int first_callout;
322    static int locale_set = 0;
323    static int show_malloc;
324    static int use_utf;
325  static size_t gotten_store;  static size_t gotten_store;
326    static size_t first_gotten_store = 0;
327    static const unsigned char *last_callout_mark = NULL;
328    
329    /* The buffers grow automatically if very long input lines are encountered. */
330    
331    static int buffer_size = 50000;
332    static pcre_uint8 *buffer = NULL;
333    static pcre_uint8 *dbuffer = NULL;
334    static pcre_uint8 *pbuffer = NULL;
335    
336    #ifdef SUPPORT_PCRE16
337    static int buffer16_size = 0;
338    static pcre_uint16 *buffer16 = NULL;
339    #endif
340    
341  static int utf8_table1[] = {  /* If we have 8-bit support, default use_pcre16 to false; if there is also
342    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  16-bit support, it can be changed by an option. If there is no 8-bit support,
343    there must be 16-bit support, so default it to 1. */
344    
345  static int utf8_table2[] = {  #ifdef SUPPORT_PCRE8
346    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  static int use_pcre16 = 0;
347    #else
348    static int use_pcre16 = 1;
349    #endif
350    
351  static int utf8_table3[] = {  /* Textual explanations for runtime error codes */
352    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
353    static const char *errtexts[] = {
354      NULL,  /* 0 is no error */
355      NULL,  /* NOMATCH is handled specially */
356      "NULL argument passed",
357      "bad option value",
358      "magic number missing",
359      "unknown opcode - pattern overwritten?",
360      "no more memory",
361      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
362      "match limit exceeded",
363      "callout error code",
364      NULL,  /* BADUTF8 is handled specially */
365      "bad UTF-8 offset",
366      NULL,  /* PARTIAL is handled specially */
367      "not used - internal error",
368      "internal error - pattern overwritten?",
369      "bad count value",
370      "item unsupported for DFA matching",
371      "backreference condition or recursion test not supported for DFA matching",
372      "match limit not supported for DFA matching",
373      "workspace size exceeded in DFA matching",
374      "too much recursion for DFA matching",
375      "recursion limit exceeded",
376      "not used - internal error",
377      "invalid combination of newline options",
378      "bad offset value",
379      NULL,  /* SHORTUTF8 is handled specially */
380      "nested recursion at the same subject position",
381      "JIT stack limit reached",
382      "pattern compiled in wrong mode (8-bit/16-bit error)"
383    };
384    
385    
386  /*************************************************  /*************************************************
387  *       Convert character value to UTF-8         *  *         Alternate character tables             *
388  *************************************************/  *************************************************/
389    
390  /* This function takes an integer value in the range 0 - 0x7fffffff  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
391  and encodes it as a UTF-8 character in 0 to 6 bytes.  using the default tables of the library. However, the T option can be used to
392    select alternate sets of tables, for different kinds of testing. Note also that
393    the L (locale) option also adjusts the tables. */
394    
395    /* This is the set of tables distributed as default with PCRE. It recognizes
396    only ASCII characters. */
397    
398    static const pcre_uint8 tables0[] = {
399    
400    /* This table is a lower casing table. */
401    
402        0,  1,  2,  3,  4,  5,  6,  7,
403        8,  9, 10, 11, 12, 13, 14, 15,
404       16, 17, 18, 19, 20, 21, 22, 23,
405       24, 25, 26, 27, 28, 29, 30, 31,
406       32, 33, 34, 35, 36, 37, 38, 39,
407       40, 41, 42, 43, 44, 45, 46, 47,
408       48, 49, 50, 51, 52, 53, 54, 55,
409       56, 57, 58, 59, 60, 61, 62, 63,
410       64, 97, 98, 99,100,101,102,103,
411      104,105,106,107,108,109,110,111,
412      112,113,114,115,116,117,118,119,
413      120,121,122, 91, 92, 93, 94, 95,
414       96, 97, 98, 99,100,101,102,103,
415      104,105,106,107,108,109,110,111,
416      112,113,114,115,116,117,118,119,
417      120,121,122,123,124,125,126,127,
418      128,129,130,131,132,133,134,135,
419      136,137,138,139,140,141,142,143,
420      144,145,146,147,148,149,150,151,
421      152,153,154,155,156,157,158,159,
422      160,161,162,163,164,165,166,167,
423      168,169,170,171,172,173,174,175,
424      176,177,178,179,180,181,182,183,
425      184,185,186,187,188,189,190,191,
426      192,193,194,195,196,197,198,199,
427      200,201,202,203,204,205,206,207,
428      208,209,210,211,212,213,214,215,
429      216,217,218,219,220,221,222,223,
430      224,225,226,227,228,229,230,231,
431      232,233,234,235,236,237,238,239,
432      240,241,242,243,244,245,246,247,
433      248,249,250,251,252,253,254,255,
434    
435    /* This table is a case flipping table. */
436    
437        0,  1,  2,  3,  4,  5,  6,  7,
438        8,  9, 10, 11, 12, 13, 14, 15,
439       16, 17, 18, 19, 20, 21, 22, 23,
440       24, 25, 26, 27, 28, 29, 30, 31,
441       32, 33, 34, 35, 36, 37, 38, 39,
442       40, 41, 42, 43, 44, 45, 46, 47,
443       48, 49, 50, 51, 52, 53, 54, 55,
444       56, 57, 58, 59, 60, 61, 62, 63,
445       64, 97, 98, 99,100,101,102,103,
446      104,105,106,107,108,109,110,111,
447      112,113,114,115,116,117,118,119,
448      120,121,122, 91, 92, 93, 94, 95,
449       96, 65, 66, 67, 68, 69, 70, 71,
450       72, 73, 74, 75, 76, 77, 78, 79,
451       80, 81, 82, 83, 84, 85, 86, 87,
452       88, 89, 90,123,124,125,126,127,
453      128,129,130,131,132,133,134,135,
454      136,137,138,139,140,141,142,143,
455      144,145,146,147,148,149,150,151,
456      152,153,154,155,156,157,158,159,
457      160,161,162,163,164,165,166,167,
458      168,169,170,171,172,173,174,175,
459      176,177,178,179,180,181,182,183,
460      184,185,186,187,188,189,190,191,
461      192,193,194,195,196,197,198,199,
462      200,201,202,203,204,205,206,207,
463      208,209,210,211,212,213,214,215,
464      216,217,218,219,220,221,222,223,
465      224,225,226,227,228,229,230,231,
466      232,233,234,235,236,237,238,239,
467      240,241,242,243,244,245,246,247,
468      248,249,250,251,252,253,254,255,
469    
470    /* This table contains bit maps for various character classes. Each map is 32
471    bytes long and the bits run from the least significant end of each byte. The
472    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
473    graph, print, punct, and cntrl. Other classes are built from combinations. */
474    
475      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
476      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
477      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
478      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
479    
480      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
481      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
482      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
483      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
484    
485      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
486      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
487      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
488      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
489    
490      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
491      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
492      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
494    
495      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
496      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
497      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
499    
500      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
501      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
502      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
504    
505      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
506      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
507      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
509    
510      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
511      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
512      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
513      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
514    
515      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
516      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
517      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519    
520      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
521      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
522      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
524    
525    /* This table identifies various classes of character by individual bits:
526      0x01   white space character
527      0x02   letter
528      0x04   decimal digit
529      0x08   hexadecimal digit
530      0x10   alphanumeric or '_'
531      0x80   regular expression metacharacter or binary zero
532    */
533    
534  Arguments:    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
535    cvalue     the character value    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
536    buffer     pointer to buffer for result - at least 6 bytes long    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
537      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
538      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
539      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
540      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
541      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
542      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
543      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
544      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
545      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
546      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
547      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
548      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
549      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
550      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
551      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
552      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
553      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
554      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
555      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
556      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
557      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
558      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
559      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
560      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
561      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
562      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
563      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
564      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
565      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
566    
567    /* This is a set of tables that came orginally from a Windows user. It seems to
568    be at least an approximation of ISO 8859. In particular, there are characters
569    greater than 128 that are marked as spaces, letters, etc. */
570    
571    static const pcre_uint8 tables1[] = {
572    0,1,2,3,4,5,6,7,
573    8,9,10,11,12,13,14,15,
574    16,17,18,19,20,21,22,23,
575    24,25,26,27,28,29,30,31,
576    32,33,34,35,36,37,38,39,
577    40,41,42,43,44,45,46,47,
578    48,49,50,51,52,53,54,55,
579    56,57,58,59,60,61,62,63,
580    64,97,98,99,100,101,102,103,
581    104,105,106,107,108,109,110,111,
582    112,113,114,115,116,117,118,119,
583    120,121,122,91,92,93,94,95,
584    96,97,98,99,100,101,102,103,
585    104,105,106,107,108,109,110,111,
586    112,113,114,115,116,117,118,119,
587    120,121,122,123,124,125,126,127,
588    128,129,130,131,132,133,134,135,
589    136,137,138,139,140,141,142,143,
590    144,145,146,147,148,149,150,151,
591    152,153,154,155,156,157,158,159,
592    160,161,162,163,164,165,166,167,
593    168,169,170,171,172,173,174,175,
594    176,177,178,179,180,181,182,183,
595    184,185,186,187,188,189,190,191,
596    224,225,226,227,228,229,230,231,
597    232,233,234,235,236,237,238,239,
598    240,241,242,243,244,245,246,215,
599    248,249,250,251,252,253,254,223,
600    224,225,226,227,228,229,230,231,
601    232,233,234,235,236,237,238,239,
602    240,241,242,243,244,245,246,247,
603    248,249,250,251,252,253,254,255,
604    0,1,2,3,4,5,6,7,
605    8,9,10,11,12,13,14,15,
606    16,17,18,19,20,21,22,23,
607    24,25,26,27,28,29,30,31,
608    32,33,34,35,36,37,38,39,
609    40,41,42,43,44,45,46,47,
610    48,49,50,51,52,53,54,55,
611    56,57,58,59,60,61,62,63,
612    64,97,98,99,100,101,102,103,
613    104,105,106,107,108,109,110,111,
614    112,113,114,115,116,117,118,119,
615    120,121,122,91,92,93,94,95,
616    96,65,66,67,68,69,70,71,
617    72,73,74,75,76,77,78,79,
618    80,81,82,83,84,85,86,87,
619    88,89,90,123,124,125,126,127,
620    128,129,130,131,132,133,134,135,
621    136,137,138,139,140,141,142,143,
622    144,145,146,147,148,149,150,151,
623    152,153,154,155,156,157,158,159,
624    160,161,162,163,164,165,166,167,
625    168,169,170,171,172,173,174,175,
626    176,177,178,179,180,181,182,183,
627    184,185,186,187,188,189,190,191,
628    224,225,226,227,228,229,230,231,
629    232,233,234,235,236,237,238,239,
630    240,241,242,243,244,245,246,215,
631    248,249,250,251,252,253,254,223,
632    192,193,194,195,196,197,198,199,
633    200,201,202,203,204,205,206,207,
634    208,209,210,211,212,213,214,247,
635    216,217,218,219,220,221,222,255,
636    0,62,0,0,1,0,0,0,
637    0,0,0,0,0,0,0,0,
638    32,0,0,0,1,0,0,0,
639    0,0,0,0,0,0,0,0,
640    0,0,0,0,0,0,255,3,
641    126,0,0,0,126,0,0,0,
642    0,0,0,0,0,0,0,0,
643    0,0,0,0,0,0,0,0,
644    0,0,0,0,0,0,255,3,
645    0,0,0,0,0,0,0,0,
646    0,0,0,0,0,0,12,2,
647    0,0,0,0,0,0,0,0,
648    0,0,0,0,0,0,0,0,
649    254,255,255,7,0,0,0,0,
650    0,0,0,0,0,0,0,0,
651    255,255,127,127,0,0,0,0,
652    0,0,0,0,0,0,0,0,
653    0,0,0,0,254,255,255,7,
654    0,0,0,0,0,4,32,4,
655    0,0,0,128,255,255,127,255,
656    0,0,0,0,0,0,255,3,
657    254,255,255,135,254,255,255,7,
658    0,0,0,0,0,4,44,6,
659    255,255,127,255,255,255,127,255,
660    0,0,0,0,254,255,255,255,
661    255,255,255,255,255,255,255,127,
662    0,0,0,0,254,255,255,255,
663    255,255,255,255,255,255,255,255,
664    0,2,0,0,255,255,255,255,
665    255,255,255,255,255,255,255,127,
666    0,0,0,0,255,255,255,255,
667    255,255,255,255,255,255,255,255,
668    0,0,0,0,254,255,0,252,
669    1,0,0,248,1,0,0,120,
670    0,0,0,0,254,255,255,255,
671    0,0,128,0,0,0,128,0,
672    255,255,255,255,0,0,0,0,
673    0,0,0,0,0,0,0,128,
674    255,255,255,255,0,0,0,0,
675    0,0,0,0,0,0,0,0,
676    128,0,0,0,0,0,0,0,
677    0,1,1,0,1,1,0,0,
678    0,0,0,0,0,0,0,0,
679    0,0,0,0,0,0,0,0,
680    1,0,0,0,128,0,0,0,
681    128,128,128,128,0,0,128,0,
682    28,28,28,28,28,28,28,28,
683    28,28,0,0,0,0,0,128,
684    0,26,26,26,26,26,26,18,
685    18,18,18,18,18,18,18,18,
686    18,18,18,18,18,18,18,18,
687    18,18,18,128,128,0,128,16,
688    0,26,26,26,26,26,26,18,
689    18,18,18,18,18,18,18,18,
690    18,18,18,18,18,18,18,18,
691    18,18,18,128,128,0,0,0,
692    0,0,0,0,0,1,0,0,
693    0,0,0,0,0,0,0,0,
694    0,0,0,0,0,0,0,0,
695    0,0,0,0,0,0,0,0,
696    1,0,0,0,0,0,0,0,
697    0,0,18,0,0,0,0,0,
698    0,0,20,20,0,18,0,0,
699    0,20,18,0,0,0,0,0,
700    18,18,18,18,18,18,18,18,
701    18,18,18,18,18,18,18,18,
702    18,18,18,18,18,18,18,0,
703    18,18,18,18,18,18,18,18,
704    18,18,18,18,18,18,18,18,
705    18,18,18,18,18,18,18,18,
706    18,18,18,18,18,18,18,0,
707    18,18,18,18,18,18,18,18
708    };
709    
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
710    
711  static int  
712  ord2utf8(int cvalue, unsigned char *buffer)  
713    #ifndef HAVE_STRERROR
714    /*************************************************
715    *     Provide strerror() for non-ANSI libraries  *
716    *************************************************/
717    
718    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
719    in their libraries, but can provide the same facility by this simple
720    alternative function. */
721    
722    extern int   sys_nerr;
723    extern char *sys_errlist[];
724    
725    char *
726    strerror(int n)
727  {  {
728  register int i, j;  if (n < 0 || n >= sys_nerr) return "unknown error number";
729  for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  return sys_errlist[n];
730    if (cvalue <= utf8_table1[i]) break;  }
731  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  #endif /* HAVE_STRERROR */
732  if (cvalue < 0) return -1;  
733  *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
734  cvalue >>= 6 - i;  /*************************************************
735  for (j = 0; j < i; j++)  *         JIT memory callback                    *
736    {  *************************************************/
737    *buffer++ = 0x80 | (cvalue & 0x3f);  
738    cvalue >>= 6;  static pcre_jit_stack* jit_callback(void *arg)
739    }  {
740  return i + 1;  return (pcre_jit_stack *)arg;
741  }  }
742    
743    
# Line 92  return i + 1; Line 749  return i + 1;
749  and returns the value of the character.  and returns the value of the character.
750    
751  Argument:  Argument:
752    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
753    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
754    
755  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
756             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
757  */  */
758    
759  int  #if !defined NOUTF8
760  utf82ord(unsigned char *buffer, int *vptr)  
761    static int
762    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
763  {  {
764  int c = *buffer++;  int c = *utf8bytes++;
765  int d = c;  int d = c;
766  int i, j, s;  int i, j, s;
767    
# Line 117  if (i == 0 || i == 6) return 0; / Line 776  if (i == 0 || i == 6) return 0; /
776    
777  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
778    
779  d = c & utf8_table3[i];  s = 6*i;
780  s = 6 - i;  d = (c & utf8_table3[i]) << s;
781    
782  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
783    {    {
784    c = *buffer++;    c = *utf8bytes++;
785    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
786      s -= 6;
787    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
788    }    }
789    
790  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
791    
792  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
793    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
794  if (j != i) return -(i+1);  if (j != i) return -(i+1);
795    
# Line 140  if (j != i) return -(i+1); Line 799  if (j != i) return -(i+1);
799  return i+1;  return i+1;
800  }  }
801    
802    #endif
803    
804    
805    
806    /*************************************************
807    *       Convert character value to UTF-8         *
808    *************************************************/
809    
810    /* This function takes an integer value in the range 0 - 0x7fffffff
811    and encodes it as a UTF-8 character in 0 to 6 bytes.
812    
813  /* Debugging function to print the internal form of the regex. This is the same  Arguments:
814  code as contained in pcre.c under the DEBUG macro. */    cvalue     the character value
815      utf8bytes  pointer to buffer for result - at least 6 bytes long
816    
817  static const char *OP_names[] = {  Returns:     number of characters placed in the buffer
818    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  */
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
819    
820    #if !defined NOUTF8
821    
822  static void print_internals(pcre *re)  static int
823    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
824  {  {
825  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
826    for (i = 0; i < utf8_table1_size; i++)
827      if (cvalue <= utf8_table1[i]) break;
828    utf8bytes += i;
829    for (j = i; j > 0; j--)
830     {
831     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
832     cvalue >>= 6;
833     }
834    *utf8bytes = utf8_table2[i] | cvalue;
835    return i + 1;
836    }
837    
838    #endif
839    
840    
841    
842    #ifdef SUPPORT_PCRE16
843    /*************************************************
844    *         Convert a string to 16-bit             *
845    *************************************************/
846    
847    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
848    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
849    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
850    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
851    result is always left in buffer16.
852    
853  fprintf(outfile, "------------------------------------------------------------------\n");  Arguments:
854      p          points to a byte string
855      utf        true if UTF-8 (to be converted to UTF-16)
856      len        number of bytes in the string (excluding trailing zero)
857    
858    Returns:     number of 16-bit data items used (excluding trailing zero)
859                 OR -1 if a UTF-8 string is malformed
860    */
861    
862  for(;;)  static int
863    to16(pcre_uint8 *p, int utf, int len)
864    {
865    pcre_uint16 *pp;
866    
867    if (buffer16_size < 2*len + 2)
868      {
869      if (buffer16 != NULL) free(buffer16);
870      buffer16_size = 2*len + 2;
871      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
872      if (buffer16 == NULL)
873        {
874        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
875        exit(1);
876        }
877      }
878    
879    pp = buffer16;
880    
881    if (!utf)
882      {
883      while (len-- > 0) *pp++ = *p++;
884      }
885    
886    else
887    {    {
888    int c;    int c;
889    int charlength;    while (len > 0)
890        {
891        int chlen = utf82ord(p, &c);
892        if (chlen <= 0) return -1;
893        p += chlen;
894        len -= chlen;
895        if (c < 0x10000) *pp++ = c; else
896          {
897          c -= 0x10000;
898          *pp++ = 0xD800 | (c >> 10);
899          *pp++ = 0xDC00 | (c & 0x3ff);
900          }
901        }
902      }
903    
904    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  *pp = 0;
905    return pp - buffer16;
906    }
907    #endif
908    
909    
910    /*************************************************
911    *        Read or extend an input line            *
912    *************************************************/
913    
914    /* Input lines are read into buffer, but both patterns and data lines can be
915    continued over multiple input lines. In addition, if the buffer fills up, we
916    want to automatically expand it so as to be able to handle extremely large
917    lines that are needed for certain stress tests. When the input buffer is
918    expanded, the other two buffers must also be expanded likewise, and the
919    contents of pbuffer, which are a copy of the input for callouts, must be
920    preserved (for when expansion happens for a data line). This is not the most
921    optimal way of handling this, but hey, this is just a test program!
922    
923    Arguments:
924      f            the file to read
925      start        where in buffer to start (this *must* be within buffer)
926      prompt       for stdin or readline()
927    
928    Returns:       pointer to the start of new data
929                   could be a copy of start, or could be moved
930                   NULL if no data read and EOF reached
931    */
932    
933    if (*code >= OP_BRA)  static pcre_uint8 *
934    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
935    {
936    pcre_uint8 *here = start;
937    
938    for (;;)
939      {
940      int rlen = (int)(buffer_size - (here - buffer));
941    
942      if (rlen > 1000)
943      {      {
944      if (*code - OP_BRA > EXTRACT_BASIC_MAX)      int dlen;
945        fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);  
946        /* If libreadline support is required, use readline() to read a line if the
947        input is a terminal. Note that readline() removes the trailing newline, so
948        we must put it back again, to be compatible with fgets(). */
949    
950    #ifdef SUPPORT_LIBREADLINE
951        if (isatty(fileno(f)))
952          {
953          size_t len;
954          char *s = readline(prompt);
955          if (s == NULL) return (here == start)? NULL : start;
956          len = strlen(s);
957          if (len > 0) add_history(s);
958          if (len > rlen - 1) len = rlen - 1;
959          memcpy(here, s, len);
960          here[len] = '\n';
961          here[len+1] = 0;
962          free(s);
963          }
964      else      else
965        fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  #endif
966      code += 2;  
967        /* Read the next line by normal means, prompting if the file is stdin. */
968    
969          {
970          if (f == stdin) printf("%s", prompt);
971          if (fgets((char *)here, rlen,  f) == NULL)
972            return (here == start)? NULL : start;
973          }
974    
975        dlen = (int)strlen((char *)here);
976        if (dlen > 0 && here[dlen - 1] == '\n') return start;
977        here += dlen;
978      }      }
979    
980    else switch(*code)    else
981      {      {
982      case OP_END:      int new_buffer_size = 2*buffer_size;
983      fprintf(outfile, "    %s\n", OP_names[*code]);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
984      fprintf(outfile, "------------------------------------------------------------------\n");      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
985      return;      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
986    
987        CLASS_REF_REPEAT:      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
988          {
989          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
990          exit(1);
991          }
992    
993        switch(*code)      memcpy(new_buffer, buffer, buffer_size);
994          {      memcpy(new_pbuffer, pbuffer, buffer_size);
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
995    
996          case OP_CRRANGE:      buffer_size = new_buffer_size;
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
997    
998          default:      start = new_buffer + (start - buffer);
999          code--;      here = new_buffer + (here - buffer);
         }  
       }  
     break;  
1000    
1001      /* Anything else is just a one-node item */      free(buffer);
1002        free(dbuffer);
1003        free(pbuffer);
1004    
1005      default:      buffer = new_buffer;
1006      fprintf(outfile, "    %s", OP_names[*code]);      dbuffer = new_dbuffer;
1007      break;      pbuffer = new_pbuffer;
1008      }      }
1009      }
1010    
1011    return NULL;  /* Control never gets here */
1012    }
1013    
1014    
1015    
1016    code++;  /*************************************************
1017    fprintf(outfile, "\n");  *          Read number from string               *
1018    *************************************************/
1019    
1020    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1021    around with conditional compilation, just do the job by hand. It is only used
1022    for unpicking arguments, so just keep it simple.
1023    
1024    Arguments:
1025      str           string to be converted
1026      endptr        where to put the end pointer
1027    
1028    Returns:        the unsigned long
1029    */
1030    
1031    static int
1032    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1033    {
1034    int result = 0;
1035    while(*str != 0 && isspace(*str)) str++;
1036    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1037    *endptr = str;
1038    return(result);
1039    }
1040    
1041    
1042    
1043    /*************************************************
1044    *             Print one character                *
1045    *************************************************/
1046    
1047    /* Print a single character either literally, or as a hex escape. */
1048    
1049    static int pchar(int c, FILE *f)
1050    {
1051    if (PRINTOK(c))
1052      {
1053      if (f != NULL) fprintf(f, "%c", c);
1054      return 1;
1055    }    }
1056    
1057    if (c < 0x100)
1058      {
1059      if (use_utf)
1060        {
1061        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1062        return 6;
1063        }
1064      else
1065        {
1066        if (f != NULL) fprintf(f, "\\x%02x", c);
1067        return 4;
1068        }
1069      }
1070    
1071    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1072    return (c <= 0x000000ff)? 6 :
1073           (c <= 0x00000fff)? 7 :
1074           (c <= 0x0000ffff)? 8 :
1075           (c <= 0x000fffff)? 9 : 10;
1076  }  }
1077    
1078    
1079    
1080  /* Character string printing function. A "normal" and a UTF-8 version. */  #ifdef SUPPORT_PCRE8
1081    /*************************************************
1082    *         Print 8-bit character string           *
1083    *************************************************/
1084    
1085    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1086    If handed a NULL file, just counts chars without printing. */
1087    
1088  static void pchars(unsigned char *p, int length, int utf8)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1089  {  {
1090  int c;  int c = 0;
1091    int yield = 0;
1092    
1093  while (length-- > 0)  while (length-- > 0)
1094    {    {
1095    if (utf8)  #if !defined NOUTF8
1096      if (use_utf)
1097      {      {
1098      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
1099      if (rc > 0)      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1100        {        {
1101        length -= rc - 1;        length -= rc - 1;
1102        p += rc;        p += rc;
1103        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);        yield += pchar(c, f);
1104          else fprintf(outfile, "\\x{%02x}", c);        continue;
       continue;  
1105        }        }
1106      }      }
1107    #endif
1108      c = *p++;
1109      yield += pchar(c, f);
1110      }
1111    
1112    return yield;
1113    }
1114    #endif
1115    
1116    
1117    
1118    #ifdef SUPPORT_PCRE16
1119    /*************************************************
1120    *           Print 16-bit character string        *
1121    *************************************************/
1122    
1123    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1124    If handed a NULL file, just counts chars without printing. */
1125    
1126    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1127    {
1128    int yield = 0;
1129    
1130    while (length-- > 0)
1131      {
1132      int c = *p++ & 0xffff;
1133    #if !defined NOUTF8
1134      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1135        {
1136        int d = *p & 0xffff;
1137        if (d >= 0xDC00 && d < 0xDFFF)
1138          {
1139          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1140          length--;
1141          p++;
1142          }
1143        }
1144    #endif
1145      yield += pchar(c, f);
1146      }
1147    
1148    return yield;
1149    }
1150    #endif
1151    
    /* Not UTF-8, or malformed UTF-8  */  
1152    
1153    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
1154      else fprintf(outfile, "\\x%02x", c);  /*************************************************
1155    *              Callout function                  *
1156    *************************************************/
1157    
1158    /* Called from PCRE as a result of the (?C) item. We print out where we are in
1159    the match. Yield zero unless more callouts than the fail count, or the callout
1160    data is not zero. */
1161    
1162    static int callout(pcre_callout_block *cb)
1163    {
1164    FILE *f = (first_callout | callout_extra)? outfile : NULL;
1165    int i, pre_start, post_start, subject_length;
1166    
1167    if (callout_extra)
1168      {
1169      fprintf(f, "Callout %d: last capture = %d\n",
1170        cb->callout_number, cb->capture_last);
1171    
1172      for (i = 0; i < cb->capture_top * 2; i += 2)
1173        {
1174        if (cb->offset_vector[i] < 0)
1175          fprintf(f, "%2d: <unset>\n", i/2);
1176        else
1177          {
1178          fprintf(f, "%2d: ", i/2);
1179          PCHARSV(cb->subject + cb->offset_vector[i],
1180            cb->offset_vector[i+1] - cb->offset_vector[i], f);
1181          fprintf(f, "\n");
1182          }
1183        }
1184      }
1185    
1186    /* Re-print the subject in canonical form, the first time or if giving full
1187    datails. On subsequent calls in the same match, we use pchars just to find the
1188    printed lengths of the substrings. */
1189    
1190    if (f != NULL) fprintf(f, "--->");
1191    
1192    PCHARS(pre_start, cb->subject, cb->start_match, f);
1193    PCHARS(post_start, cb->subject + cb->start_match,
1194      cb->current_position - cb->start_match, f);
1195    
1196    PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1197    
1198    PCHARSV(cb->subject + cb->current_position,
1199      cb->subject_length - cb->current_position, f);
1200    
1201    if (f != NULL) fprintf(f, "\n");
1202    
1203    /* Always print appropriate indicators, with callout number if not already
1204    shown. For automatic callouts, show the pattern offset. */
1205    
1206    if (cb->callout_number == 255)
1207      {
1208      fprintf(outfile, "%+3d ", cb->pattern_position);
1209      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1210      }
1211    else
1212      {
1213      if (callout_extra) fprintf(outfile, "    ");
1214        else fprintf(outfile, "%3d ", cb->callout_number);
1215      }
1216    
1217    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1218    fprintf(outfile, "^");
1219    
1220    if (post_start > 0)
1221      {
1222      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1223      fprintf(outfile, "^");
1224      }
1225    
1226    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1227      fprintf(outfile, " ");
1228    
1229    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1230      pbuffer + cb->pattern_position);
1231    
1232    fprintf(outfile, "\n");
1233    first_callout = 0;
1234    
1235    if (cb->mark != last_callout_mark)
1236      {
1237      fprintf(outfile, "Latest Mark: %s\n",
1238        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1239      last_callout_mark = cb->mark;
1240      }
1241    
1242    if (cb->callout_data != NULL)
1243      {
1244      int callout_data = *((int *)(cb->callout_data));
1245      if (callout_data != 0)
1246        {
1247        fprintf(outfile, "Callout data = %d\n", callout_data);
1248        return callout_data;
1249        }
1250      }
1251    
1252    return (cb->callout_number != callout_fail_id)? 0 :
1253           (++callout_count >= callout_fail_count)? 1 : 0;
1254    }
1255    
1256    
1257    /*************************************************
1258    *            Local malloc functions              *
1259    *************************************************/
1260    
1261    /* Alternative malloc function, to test functionality and save the size of a
1262    compiled re, which is the first store request that pcre_compile() makes. The
1263    show_malloc variable is set only during matching. */
1264    
1265    static void *new_malloc(size_t size)
1266    {
1267    void *block = malloc(size);
1268    gotten_store = size;
1269    if (first_gotten_store == 0) first_gotten_store = size;
1270    if (show_malloc)
1271      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1272    return block;
1273    }
1274    
1275    static void new_free(void *block)
1276    {
1277    if (show_malloc)
1278      fprintf(outfile, "free             %p\n", block);
1279    free(block);
1280    }
1281    
1282    /* For recursion malloc/free, to test stacking calls */
1283    
1284    static void *stack_malloc(size_t size)
1285    {
1286    void *block = malloc(size);
1287    if (show_malloc)
1288      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1289    return block;
1290    }
1291    
1292    static void stack_free(void *block)
1293    {
1294    if (show_malloc)
1295      fprintf(outfile, "stack_free       %p\n", block);
1296    free(block);
1297    }
1298    
1299    
1300    /*************************************************
1301    *          Call pcre_fullinfo()                  *
1302    *************************************************/
1303    
1304    /* Get one piece of information from the pcre_fullinfo() function. When only
1305    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1306    value, but the code is defensive. */
1307    
1308    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1309    {
1310    int rc;
1311    
1312    if (use_pcre16)
1313    #ifdef SUPPORT_PCRE16
1314      rc = pcre16_fullinfo(re, study, option, ptr);
1315    #else
1316      rc = PCRE_ERROR_BADMODE;
1317    #endif
1318    else
1319    #ifdef SUPPORT_PCRE8
1320      rc = pcre_fullinfo(re, study, option, ptr);
1321    #else
1322      rc = PCRE_ERROR_BADMODE;
1323    #endif
1324    
1325    if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1326      use_pcre16? "16" : "", option);
1327    }
1328    
1329    
1330    
1331    /*************************************************
1332    *         Byte flipping function                 *
1333    *************************************************/
1334    
1335    static unsigned long int
1336    byteflip(unsigned long int value, int n)
1337    {
1338    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1339    return ((value & 0x000000ff) << 24) |
1340           ((value & 0x0000ff00) <<  8) |
1341           ((value & 0x00ff0000) >>  8) |
1342           ((value & 0xff000000) >> 24);
1343    }
1344    
1345    
1346    
1347    
1348    /*************************************************
1349    *        Check match or recursion limit          *
1350    *************************************************/
1351    
1352    static int
1353    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1354      int start_offset, int options, int *use_offsets, int use_size_offsets,
1355      int flag, unsigned long int *limit, int errnumber, const char *msg)
1356    {
1357    int count;
1358    int min = 0;
1359    int mid = 64;
1360    int max = -1;
1361    
1362    extra->flags |= flag;
1363    
1364    for (;;)
1365      {
1366      *limit = mid;
1367    
1368      PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1369        use_offsets, use_size_offsets);
1370    
1371      if (count == errnumber)
1372        {
1373        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1374        min = mid;
1375        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1376        }
1377    
1378      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1379                             count == PCRE_ERROR_PARTIAL)
1380        {
1381        if (mid == min + 1)
1382          {
1383          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1384          break;
1385          }
1386        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1387        max = mid;
1388        mid = (min + mid)/2;
1389        }
1390      else break;    /* Some other error */
1391      }
1392    
1393    extra->flags &= ~flag;
1394    return count;
1395    }
1396    
1397    
1398    
1399    /*************************************************
1400    *         Case-independent strncmp() function    *
1401    *************************************************/
1402    
1403    /*
1404    Arguments:
1405      s         first string
1406      t         second string
1407      n         number of characters to compare
1408    
1409    Returns:    < 0, = 0, or > 0, according to the comparison
1410    */
1411    
1412    static int
1413    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1414    {
1415    while (n--)
1416      {
1417      int c = tolower(*s++) - tolower(*t++);
1418      if (c) return c;
1419    }    }
1420    return 0;
1421  }  }
1422    
1423    
1424    
1425  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
1426  compiled re. */  *         Check newline indicator                *
1427    *************************************************/
1428    
1429    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1430    a message and return 0 if there is no match.
1431    
1432    Arguments:
1433      p           points after the leading '<'
1434      f           file for error message
1435    
1436  static void *new_malloc(size_t size)  Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1437    */
1438    
1439    static int
1440    check_newline(pcre_uint8 *p, FILE *f)
1441  {  {
1442  gotten_store = size;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1443  if (log_store)  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1444    fprintf(outfile, "Memory allocation (code space): %d\n",  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1445      (int)((int)size - offsetof(real_pcre, code[0])));  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1446  return malloc(size);  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1447    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1448    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1449    fprintf(f, "Unknown newline type at: <%s\n", p);
1450    return 0;
1451  }  }
1452    
1453    
1454    
1455    /*************************************************
1456    *             Usage function                     *
1457    *************************************************/
1458    
1459  /* Get one piece of information from the pcre_fullinfo() function */  static void
1460    usage(void)
 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  
1461  {  {
1462  int rc;  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1463  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  printf("Input and output default to stdin and stdout.\n");
1464    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  #ifdef SUPPORT_LIBREADLINE
1465    printf("If input is a terminal, readline() is used to read from it.\n");
1466    #else
1467    printf("This version of pcretest is not linked with readline().\n");
1468    #endif
1469    printf("\nOptions:\n");
1470    #ifdef SUPPORT_PCRE16
1471    printf("  -16      use 16-bit interface\n");
1472    #endif
1473    printf("  -b       show compiled code (bytecode)\n");
1474    printf("  -C       show PCRE compile-time options and exit\n");
1475    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1476    #if !defined NODFA
1477    printf("  -dfa     force DFA matching for all subjects\n");
1478    #endif
1479    printf("  -help    show usage information\n");
1480    printf("  -i       show information about compiled patterns\n"
1481           "  -M       find MATCH_LIMIT minimum for each subject\n"
1482           "  -m       output memory used information\n"
1483           "  -o <n>   set size of offsets vector to <n>\n");
1484    #if !defined NOPOSIX
1485    printf("  -p       use POSIX interface\n");
1486    #endif
1487    printf("  -q       quiet: do not output PCRE version number at start\n");
1488    printf("  -S <n>   set stack size to <n> megabytes\n");
1489    printf("  -s       force each pattern to be studied at basic level\n"
1490           "  -s+      force each pattern to be studied, using JIT if available\n"
1491           "  -t       time compilation and execution\n");
1492    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1493    printf("  -tm      time execution (matching) only\n");
1494    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1495  }  }
1496    
1497    
1498    
1499    /*************************************************
1500    *                Main Program                    *
1501    *************************************************/
1502    
1503  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
1504  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 429  int main(int argc, char **argv) Line 1509  int main(int argc, char **argv)
1509  FILE *infile = stdin;  FILE *infile = stdin;
1510  int options = 0;  int options = 0;
1511  int study_options = 0;  int study_options = 0;
1512    int default_find_match_limit = FALSE;
1513  int op = 1;  int op = 1;
1514  int timeit = 0;  int timeit = 0;
1515    int timeitm = 0;
1516  int showinfo = 0;  int showinfo = 0;
1517  int showstore = 0;  int showstore = 0;
1518    int force_study = -1;
1519    int force_study_options = 0;
1520    int quiet = 0;
1521  int size_offsets = 45;  int size_offsets = 45;
1522  int size_offsets_max;  int size_offsets_max;
1523  int *offsets;  int *offsets = NULL;
1524  #if !defined NOPOSIX  #if !defined NOPOSIX
1525  int posix = 0;  int posix = 0;
1526  #endif  #endif
1527  int debug = 0;  int debug = 0;
1528  int done = 0;  int done = 0;
1529  unsigned char buffer[30000];  int all_use_dfa = 0;
1530  unsigned char dbuffer[1024];  int yield = 0;
1531    int stack_size;
1532    
1533    pcre_jit_stack *jit_stack = NULL;
1534    
1535    /* These vectors store, end-to-end, a list of captured substring names. Assume
1536    that 1024 is plenty long enough for the few names we'll be testing. */
1537    
1538    pcre_uchar copynames[1024];
1539    pcre_uchar getnames[1024];
1540    
1541    pcre_uchar *copynamesptr;
1542    pcre_uchar *getnamesptr;
1543    
1544    /* Get buffers from malloc() so that valgrind will check their misuse when
1545    debugging. They grow automatically when very long lines are read. The 16-bit
1546    buffer (buffer16) is obtained only if needed. */
1547    
1548    buffer = (pcre_uint8 *)malloc(buffer_size);
1549    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1550    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1551    
1552  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
1553    
1554  outfile = stdout;  outfile = stdout;
1555    
1556    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1557    library to translate CRLF into a single LF character. At least, that's what
1558    I've been told: never having used Windows I take this all on trust. Originally
1559    it set 0x8000, but then I was advised that _O_BINARY was better. */
1560    
1561    #if defined(_WIN32) || defined(WIN32)
1562    _setmode( _fileno( stdout ), _O_BINARY );
1563    #endif
1564    
1565  /* Scan options */  /* Scan options */
1566    
1567  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1568    {    {
1569    char *endptr;    pcre_uint8 *endptr;
1570    
1571      if (strcmp(argv[op], "-m") == 0) showstore = 1;
1572      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1573      else if (strcmp(argv[op], "-s+") == 0)
1574        {
1575        force_study = 1;
1576        force_study_options = PCRE_STUDY_JIT_COMPILE;
1577        }
1578    #ifdef SUPPORT_PCRE16
1579      else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1580    #endif
1581    
1582    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1583      showstore = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
   else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
1584    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1585    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1586      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1587    #if !defined NODFA
1588      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1589    #endif
1590    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1591        ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1592            *endptr == 0))
1593        {
1594        op++;
1595        argc--;
1596        }
1597      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1598        {
1599        int both = argv[op][2] == 0;
1600        int temp;
1601        if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1602                         *endptr == 0))
1603          {
1604          timeitm = temp;
1605          op++;
1606          argc--;
1607          }
1608        else timeitm = LOOPREPEAT;
1609        if (both) timeit = timeitm;
1610        }
1611      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1612          ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1613            *endptr == 0))
1614      {      {
1615    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1616        printf("PCRE: -S not supported on this OS\n");
1617        exit(1);
1618    #else
1619        int rc;
1620        struct rlimit rlim;
1621        getrlimit(RLIMIT_STACK, &rlim);
1622        rlim.rlim_cur = stack_size * 1024 * 1024;
1623        rc = setrlimit(RLIMIT_STACK, &rlim);
1624        if (rc != 0)
1625          {
1626        printf("PCRE: setrlimit() failed with error %d\n", rc);
1627        exit(1);
1628          }
1629      op++;      op++;
1630      argc--;      argc--;
1631    #endif
1632      }      }
1633  #if !defined NOPOSIX  #if !defined NOPOSIX
1634    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1635  #endif  #endif
1636      else if (strcmp(argv[op], "-C") == 0)
1637        {
1638        int rc;
1639        unsigned long int lrc;
1640        printf("PCRE version %s\n", pcre_version());
1641        printf("Compiled with\n");
1642    
1643    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1644    are set, either both UTFs are supported or both are not supported. */
1645    
1646    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1647        printf("  8-bit and 16-bit support\n");
1648        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1649        if (rc)
1650          printf("  UTF-8 and UTF-16 support\n");
1651        else
1652          printf("  No UTF-8 or UTF-16 support\n");
1653    #elif defined SUPPORT_PCRE8
1654        printf("  8-bit support only\n");
1655        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1656        printf("  %sUTF-8 support\n", rc? "" : "No ");
1657    #else
1658        printf("  16-bit support only\n");
1659        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1660        printf("  %sUTF-16 support\n", rc? "" : "No ");
1661    #endif
1662    
1663        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1664        printf("  %sUnicode properties support\n", rc? "" : "No ");
1665        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1666        if (rc)
1667          printf("  Just-in-time compiler support\n");
1668        else
1669          printf("  No just-in-time compiler support\n");
1670        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1671        /* Note that these values are always the ASCII values, even
1672        in EBCDIC environments. CR is 13 and NL is 10. */
1673        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1674          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1675          (rc == -2)? "ANYCRLF" :
1676          (rc == -1)? "ANY" : "???");
1677        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1678        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1679                                         "all Unicode newlines");
1680        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1681        printf("  Internal link size = %d\n", rc);
1682        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1683        printf("  POSIX malloc threshold = %d\n", rc);
1684        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1685        printf("  Default match limit = %ld\n", lrc);
1686        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1687        printf("  Default recursion depth limit = %ld\n", lrc);
1688        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1689        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1690        goto EXIT;
1691        }
1692      else if (strcmp(argv[op], "-help") == 0 ||
1693               strcmp(argv[op], "--help") == 0)
1694        {
1695        usage();
1696        goto EXIT;
1697        }
1698    else    else
1699      {      {
1700      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1701      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1702      printf("  -d     debug: show compiled code; implies -i\n"      yield = 1;
1703             "  -i     show information about compiled pattern\n"      goto EXIT;
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
1704      }      }
1705    op++;    op++;
1706    argc--;    argc--;
# Line 489  while (argc > 1 && argv[op][0] == '-') Line 1709  while (argc > 1 && argv[op][0] == '-')
1709  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
1710    
1711  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
1712  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
1713  if (offsets == NULL)  if (offsets == NULL)
1714    {    {
1715    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1716      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1717    return 1;    yield = 1;
1718      goto EXIT;
1719    }    }
1720    
1721  /* Sort out the input and output files */  /* Sort out the input and output files */
1722    
1723  if (argc > 1)  if (argc > 1)
1724    {    {
1725    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
1726    if (infile == NULL)    if (infile == NULL)
1727      {      {
1728      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1729      return 1;      yield = 1;
1730        goto EXIT;
1731      }      }
1732    }    }
1733    
1734  if (argc > 2)  if (argc > 2)
1735    {    {
1736    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1737    if (outfile == NULL)    if (outfile == NULL)
1738      {      {
1739      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1740      return 1;      yield = 1;
1741        goto EXIT;
1742      }      }
1743    }    }
1744    
1745  /* Set alternative malloc function */  /* Set alternative malloc function */
1746    
1747    #ifdef SUPPORT_PCRE8
1748  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1749    pcre_free = new_free;
1750    pcre_stack_malloc = stack_malloc;
1751    pcre_stack_free = stack_free;
1752    #endif
1753    
1754    #ifdef SUPPORT_PCRE16
1755    pcre16_malloc = new_malloc;
1756    pcre16_free = new_free;
1757    pcre16_stack_malloc = stack_malloc;
1758    pcre16_stack_free = stack_free;
1759    #endif
1760    
1761  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1762    
1763  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1764    
1765  /* Main loop */  /* Main loop */
1766    
# Line 540  while (!done) Line 1775  while (!done)
1775  #endif  #endif
1776    
1777    const char *error;    const char *error;
1778    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
1779    const unsigned char *tables = NULL;    pcre_uint8 *p, *pp, *ppp;
1780      pcre_uint8 *to_file = NULL;
1781      const pcre_uint8 *tables = NULL;
1782      unsigned long int true_size, true_study_size = 0;
1783      size_t size, regex_gotten_store;
1784      int do_allcaps = 0;
1785      int do_mark = 0;
1786    int do_study = 0;    int do_study = 0;
1787      int no_force_study = 0;
1788    int do_debug = debug;    int do_debug = debug;
1789    int do_G = 0;    int do_G = 0;
1790    int do_g = 0;    int do_g = 0;
1791    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1792    int do_showrest = 0;    int do_showrest = 0;
1793    int utf8 = 0;    int do_showcaprest = 0;
1794    int erroroffset, len, delimiter;    int do_flip = 0;
1795      int erroroffset, len, delimiter, poffset;
1796    
1797      use_utf = 0;
1798      debug_lengths = 1;
1799    
1800    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
1801    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1802      fflush(outfile);
1803    
1804    p = buffer;    p = buffer;
1805    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1806    if (*p == 0) continue;    if (*p == 0) continue;
1807    
1808    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1809    complete, read more. */  
1810      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1811        {
1812        unsigned long int magic, get_options;
1813        pcre_uint8 sbuf[8];
1814        FILE *f;
1815    
1816        p++;
1817        pp = p + (int)strlen((char *)p);
1818        while (isspace(pp[-1])) pp--;
1819        *pp = 0;
1820    
1821        f = fopen((char *)p, "rb");
1822        if (f == NULL)
1823          {
1824          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1825          continue;
1826          }
1827    
1828        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1829    
1830        true_size =
1831          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1832        true_study_size =
1833          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1834    
1835        re = (real_pcre *)new_malloc(true_size);
1836        regex_gotten_store = first_gotten_store;
1837    
1838        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1839    
1840        magic = ((real_pcre *)re)->magic_number;
1841        if (magic != MAGIC_NUMBER)
1842          {
1843          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1844            {
1845            do_flip = 1;
1846            }
1847          else
1848            {
1849            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1850            fclose(f);
1851            continue;
1852            }
1853          }
1854    
1855        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1856          do_flip? " (byte-inverted)" : "", p);
1857    
1858        /* Need to know if UTF-8 for printing data strings */
1859    
1860        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1861        use_utf = (get_options & PCRE_UTF8) != 0;
1862    
1863        /* Now see if there is any following study data. */
1864    
1865        if (true_study_size != 0)
1866          {
1867          pcre_study_data *psd;
1868    
1869          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1870          extra->flags = PCRE_EXTRA_STUDY_DATA;
1871    
1872          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1873          extra->study_data = psd;
1874    
1875          if (fread(psd, 1, true_study_size, f) != true_study_size)
1876            {
1877            FAIL_READ:
1878            fprintf(outfile, "Failed to read data from %s\n", p);
1879            if (extra != NULL)
1880              {
1881              PCRE_FREE_STUDY(extra);
1882              }
1883            if (re != NULL) new_free(re);
1884            fclose(f);
1885            continue;
1886            }
1887          fprintf(outfile, "Study data loaded from %s\n", p);
1888          do_study = 1;     /* To get the data output if requested */
1889          }
1890        else fprintf(outfile, "No study data\n");
1891    
1892        fclose(f);
1893        goto SHOW_INFO;
1894        }
1895    
1896      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1897      the pattern; if is isn't complete, read more. */
1898    
1899    delimiter = *p++;    delimiter = *p++;
1900    
1901    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1902      {      {
1903      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1904      goto SKIP_DATA;      goto SKIP_DATA;
1905      }      }
1906    
1907    pp = p;    pp = p;
1908      poffset = (int)(p - buffer);
1909    
1910    for(;;)    for(;;)
1911      {      {
# Line 581  while (!done) Line 1916  while (!done)
1916        pp++;        pp++;
1917        }        }
1918      if (*pp != 0) break;      if (*pp != 0) break;
1919        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1920        {        {
1921        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1922        done = 1;        done = 1;
# Line 599  while (!done) Line 1925  while (!done)
1925      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1926      }      }
1927    
1928      /* The buffer may have moved while being extended; reset the start of data
1929      pointer to the correct relative point in the buffer. */
1930    
1931      p = buffer + poffset;
1932    
1933    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1934    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1935    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1936    
1937    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1938    
1939    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1940      for callouts. */
1941    
1942    *pp++ = 0;    *pp++ = 0;
1943      strcpy((char *)pbuffer, (char *)p);
1944    
1945    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1946    
# Line 619  while (!done) Line 1952  while (!done)
1952      {      {
1953      switch (*pp++)      switch (*pp++)
1954        {        {
1955          case 'f': options |= PCRE_FIRSTLINE; break;
1956        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1957        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1958        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1959        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1960        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1961    
1962        case '+': do_showrest = 1; break;        case '+':
1963          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1964          break;
1965    
1966          case '=': do_allcaps = 1; break;
1967        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1968          case 'B': do_debug = 1; break;
1969          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1970        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1971        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1972          case 'F': do_flip = 1; break;
1973        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1974        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1975          case 'J': options |= PCRE_DUPNAMES; break;
1976          case 'K': do_mark = 1; break;
1977        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1978          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1979    
1980  #if !defined NOPOSIX  #if !defined NOPOSIX
1981        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1982  #endif  #endif
1983    
1984        case 'S': do_study = 1; break;        case 'S':
1985          if (do_study == 0)
1986            {
1987            do_study = 1;
1988            if (*pp == '+')
1989              {
1990              study_options |= PCRE_STUDY_JIT_COMPILE;
1991              pp++;
1992              }
1993            }
1994          else
1995            {
1996            do_study = 0;
1997            no_force_study = 1;
1998            }
1999          break;
2000    
2001        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2002          case 'W': options |= PCRE_UCP; break;
2003        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2004        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2005          case 'Z': debug_lengths = 0; break;
2006          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2007          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2008    
2009          case 'T':
2010          switch (*pp++)
2011            {
2012            case '0': tables = tables0; break;
2013            case '1': tables = tables1; break;
2014    
2015            case '\r':
2016            case '\n':
2017            case ' ':
2018            case 0:
2019            fprintf(outfile, "** Missing table number after /T\n");
2020            goto SKIP_DATA;
2021    
2022            default:
2023            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2024            goto SKIP_DATA;
2025            }
2026          break;
2027    
2028        case 'L':        case 'L':
2029        ppp = pp;        ppp = pp;
2030        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
2031          /* The '0' test is just in case this is an unterminated line. */
2032          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2033        *ppp = 0;        *ppp = 0;
2034        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2035          {          {
2036          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2037          goto SKIP_DATA;          goto SKIP_DATA;
2038          }          }
2039          locale_set = 1;
2040        tables = pcre_maketables();        tables = pcre_maketables();
2041        pp = ppp;        pp = ppp;
2042        break;        break;
2043    
2044        case '\n': case ' ': break;        case '>':
2045          to_file = pp;
2046          while (*pp != 0) pp++;
2047          while (isspace(pp[-1])) pp--;
2048          *pp = 0;
2049          break;
2050    
2051          case '<':
2052            {
2053            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2054              {
2055              options |= PCRE_JAVASCRIPT_COMPAT;
2056              pp += 3;
2057              }
2058            else
2059              {
2060              int x = check_newline(pp, outfile);
2061              if (x == 0) goto SKIP_DATA;
2062              options |= x;
2063              while (*pp++ != '>');
2064              }
2065            }
2066          break;
2067    
2068          case '\r':                      /* So that it works in Windows */
2069          case '\n':
2070          case ' ':
2071          break;
2072    
2073        default:        default:
2074        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2075        goto SKIP_DATA;        goto SKIP_DATA;
# Line 664  while (!done) Line 2078  while (!done)
2078    
2079    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2080    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2081    local character tables. */    local character tables. Neither does it have 16-bit support. */
2082    
2083  #if !defined NOPOSIX  #if !defined NOPOSIX
2084    if (posix || do_posix)    if (posix || do_posix)
2085      {      {
2086      int rc;      int rc;
2087      int cflags = 0;      int cflags = 0;
2088    
2089      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2090      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2091        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2092        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2093        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2094        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2095        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2096    
2097        first_gotten_store = 0;
2098      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2099    
2100      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 680  while (!done) Line 2102  while (!done)
2102    
2103      if (rc != 0)      if (rc != 0)
2104        {        {
2105        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2106        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2107        goto SKIP_DATA;        goto SKIP_DATA;
2108        }        }
# Line 692  while (!done) Line 2114  while (!done)
2114  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2115    
2116      {      {
2117      if (timeit)      unsigned long int get_options;
2118    
2119        /* In 16-bit mode, convert the input. */
2120    
2121    #ifdef SUPPORT_PCRE16
2122        if (use_pcre16)
2123          {
2124          if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2125            {
2126            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2127              "converted to UTF-16\n");
2128            goto SKIP_DATA;
2129            }
2130          p = (pcre_uint8 *)buffer16;
2131          }
2132    #endif
2133    
2134        /* Compile many times when timing */
2135    
2136        if (timeit > 0)
2137        {        {
2138        register int i;        register int i;
2139        clock_t time_taken;        clock_t time_taken;
2140        clock_t start_time = clock();        clock_t start_time = clock();
2141        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
2142          {          {
2143          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2144          if (re != NULL) free(re);          if (re != NULL) free(re);
2145          }          }
2146        time_taken = clock() - start_time;        time_taken = clock() - start_time;
2147        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
2148          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
2149          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
2150        }        }
2151    
2152      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2153        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2154    
2155      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2156      if non-interactive. */      if non-interactive. */
# Line 721  while (!done) Line 2163  while (!done)
2163          {          {
2164          for (;;)          for (;;)
2165            {            {
2166            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2167              {              {
2168              done = 1;              done = 1;
2169              goto CONTINUE;              goto CONTINUE;
# Line 735  while (!done) Line 2177  while (!done)
2177        goto CONTINUE;        goto CONTINUE;
2178        }        }
2179    
2180      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2181      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2182      returns only limited data. Check that it agrees with the newer one. */      lines. */
2183    
2184        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2185        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2186    
2187        /* Extract the size for possible writing before possibly flipping it,
2188        and remember the store that was got. */
2189    
2190        true_size = ((real_pcre *)re)->size;
2191        regex_gotten_store = first_gotten_store;
2192    
2193        /* Output code size information if requested */
2194    
2195        if (log_store)
2196          fprintf(outfile, "Memory allocation (code space): %d\n",
2197            (int)(first_gotten_store -
2198                  sizeof(real_pcre) -
2199                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2200    
2201        /* If -s or /S was present, study the regex to generate additional info to
2202        help with the matching, unless the pattern has the SS option, which
2203        suppresses the effect of /S (used for a few test patterns where studying is
2204        never sensible). */
2205    
2206        if (do_study || (force_study >= 0 && !no_force_study))
2207          {
2208          if (timeit > 0)
2209            {
2210            register int i;
2211            clock_t time_taken;
2212            clock_t start_time = clock();
2213            for (i = 0; i < timeit; i++)
2214              {
2215              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2216              }
2217            time_taken = clock() - start_time;
2218            if (extra != NULL)
2219              {
2220              PCRE_FREE_STUDY(extra);
2221              }
2222            fprintf(outfile, "  Study time %.4f milliseconds\n",
2223              (((double)time_taken * 1000.0) / (double)timeit) /
2224                (double)CLOCKS_PER_SEC);
2225            }
2226          PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2227          if (error != NULL)
2228            fprintf(outfile, "Failed to study: %s\n", error);
2229          else if (extra != NULL)
2230            {
2231            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2232            if (log_store)
2233              {
2234              size_t jitsize;
2235              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2236              if (jitsize != 0)
2237                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2238              }
2239            }
2240          }
2241    
2242        /* If /K was present, we set up for handling MARK data. */
2243    
2244        if (do_mark)
2245          {
2246          if (extra == NULL)
2247            {
2248            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2249            extra->flags = 0;
2250            }
2251          extra->mark = &markptr;
2252          extra->flags |= PCRE_EXTRA_MARK;
2253          }
2254    
2255        /* If the 'F' option was present, we flip the bytes of all the integer
2256        fields in the regex data block and the study block. This is to make it
2257        possible to test PCRE's handling of byte-flipped patterns, e.g. those
2258        compiled on a different architecture. */
2259    
2260        if (do_flip)
2261          {
2262          real_pcre *rre = (real_pcre *)re;
2263          rre->magic_number =
2264            byteflip(rre->magic_number, sizeof(rre->magic_number));
2265          rre->size = byteflip(rre->size, sizeof(rre->size));
2266          rre->options = byteflip(rre->options, sizeof(rre->options));
2267          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2268          rre->top_bracket =
2269            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2270          rre->top_backref =
2271            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2272          rre->first_char =
2273            (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2274          rre->req_char =
2275            (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2276          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2277            sizeof(rre->name_table_offset));
2278          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2279            sizeof(rre->name_entry_size));
2280          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2281            sizeof(rre->name_count));
2282    
2283          if (extra != NULL)
2284            {
2285            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2286            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2287            rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2288            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2289            }
2290          }
2291    
2292        /* Extract and display information from the compiled data if required. */
2293    
2294        SHOW_INFO:
2295    
2296        if (do_debug)
2297          {
2298          fprintf(outfile, "------------------------------------------------------------------\n");
2299    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2300          if (use_pcre16)
2301            pcre16_printint(re, outfile, debug_lengths);
2302          else
2303            pcre_printint(re, outfile, debug_lengths);
2304    #elif defined SUPPORT_PCRE8
2305          pcre_printint(re, outfile, debug_lengths);
2306    #else
2307          pcre16_printint(re, outfile, debug_lengths);
2308    #endif
2309          }
2310    
2311        /* We already have the options in get_options (see above) */
2312    
2313      if (do_showinfo)      if (do_showinfo)
2314        {        {
2315        unsigned long int get_options;        unsigned long int all_options;
2316    #if !defined NOINFOCHECK
2317        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
2318        int count, backrefmax, first_char, need_char;  #endif
2319        size_t size;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
2320            hascrorlf;
2321        if (do_debug) print_internals(re);        int nameentrysize, namecount;
2322          const pcre_uchar *nametable;
2323    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
2324        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2325        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2326        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2327        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2328        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2329          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2330        old_count = pcre_info(re, &old_options, &old_first_char);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2331        if (count < 0) fprintf(outfile,        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2332          "Error %d from pcre_info()\n", count);        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2333        else        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2334          {        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2335          if (old_count != count) fprintf(outfile,  
2336            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,        /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2337              old_count);        that it gives the same results as the new function. */
2338    
2339          if (old_first_char != first_char) fprintf(outfile,  #if !defined NOINFOCHECK
2340            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",        if (!use_pcre16)
2341              first_char, old_first_char);          {
2342            old_count = pcre_info(re, &old_options, &old_first_char);
2343          if (old_options != (int)get_options) fprintf(outfile,          if (count < 0) fprintf(outfile,
2344            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Error %d from pcre_info()\n", count);
2345              get_options, old_options);          else
2346              {
2347              if (old_count != count) fprintf(outfile,
2348                "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2349                  old_count);
2350    
2351              if (old_first_char != first_char) fprintf(outfile,
2352                "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2353                  first_char, old_first_char);
2354    
2355              if (old_options != (int)get_options) fprintf(outfile,
2356                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2357                  get_options, old_options);
2358              }
2359          }          }
2360    #endif
2361    
2362        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
2363          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2364          size, gotten_store);          (int)size, (int)regex_gotten_store);
2365    
2366        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
2367        if (backrefmax > 0)        if (backrefmax > 0)
2368          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
2369    
2370          if (namecount > 0)
2371            {
2372            fprintf(outfile, "Named capturing subpatterns:\n");
2373            while (namecount-- > 0)
2374              {
2375              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
2376                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2377                GET2(nametable, 0));
2378              nametable += nameentrysize;
2379              }
2380            }
2381    
2382          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2383          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2384    
2385          all_options = ((real_pcre *)re)->options;
2386          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2387    
2388        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2389          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2390            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2391            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2392            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2393            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2394              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2395            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2396              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2397              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2398            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2399            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2400            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2401            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2402              ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2403              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2404              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2405              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2406              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2407    
2408          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2409    
2410          switch (get_options & PCRE_NEWLINE_BITS)
2411            {
2412            case PCRE_NEWLINE_CR:
2413            fprintf(outfile, "Forced newline sequence: CR\n");
2414            break;
2415    
2416            case PCRE_NEWLINE_LF:
2417            fprintf(outfile, "Forced newline sequence: LF\n");
2418            break;
2419    
2420            case PCRE_NEWLINE_CRLF:
2421            fprintf(outfile, "Forced newline sequence: CRLF\n");
2422            break;
2423    
2424            case PCRE_NEWLINE_ANYCRLF:
2425            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2426            break;
2427    
2428            case PCRE_NEWLINE_ANY:
2429            fprintf(outfile, "Forced newline sequence: ANY\n");
2430            break;
2431    
2432        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          default:
2433          fprintf(outfile, "Case state changes\n");          break;
2434            }
2435    
2436        if (first_char == -1)        if (first_char == -1)
2437          {          {
2438          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
2439          }          }
2440        else if (first_char < 0)        else if (first_char < 0)
2441          {          {
# Line 805  while (!done) Line 2443  while (!done)
2443          }          }
2444        else        else
2445          {          {
2446          if (isprint(first_char))          const char *caseless =
2447            fprintf(outfile, "First char = \'%c\'\n", first_char);            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2448              "" : " (caseless)";
2449    
2450            if (PRINTOK(first_char))
2451              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2452            else
2453              {
2454              fprintf(outfile, "First char = ");
2455              pchar(first_char, outfile);
2456              fprintf(outfile, "%s\n", caseless);
2457              }
2458            }
2459    
2460          if (need_char < 0)
2461            {
2462            fprintf(outfile, "No need char\n");
2463            }
2464          else
2465            {
2466            const char *caseless =
2467              ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2468              "" : " (caseless)";
2469    
2470            if (PRINTOK(need_char))
2471              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2472          else          else
2473            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2474          }          }
2475    
2476        if (need_char < 0)        /* Don't output study size; at present it is in any case a fixed
2477          {        value, but it varies, depending on the computer architecture, and
2478          fprintf(outfile, "No need char\n");        so messes up the test suite. (And with the /F option, it might be
2479          }        flipped.) If study was forced by an external -s, don't show this
2480        else        information unless -i or -d was also present. This means that, except
2481          {        when auto-callouts are involved, the output from runs with and without
2482          if (isprint(need_char))        -s should be identical. */
2483            fprintf(outfile, "Need char = \'%c\'\n", need_char);  
2484          else        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2485            fprintf(outfile, "Need char = %d\n", need_char);          {
2486            if (extra == NULL)
2487              fprintf(outfile, "Study returned NULL\n");
2488            else
2489              {
2490              pcre_uint8 *start_bits = NULL;
2491              int minlength;
2492    
2493              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2494              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2495    
2496              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2497              if (start_bits == NULL)
2498                fprintf(outfile, "No set of starting bytes\n");
2499              else
2500                {
2501                int i;
2502                int c = 24;
2503                fprintf(outfile, "Starting byte set: ");
2504                for (i = 0; i < 256; i++)
2505                  {
2506                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
2507                    {
2508                    if (c > 75)
2509                      {
2510                      fprintf(outfile, "\n  ");
2511                      c = 2;
2512                      }
2513                    if (PRINTOK(i) && i != ' ')
2514                      {
2515                      fprintf(outfile, "%c ", i);
2516                      c += 2;
2517                      }
2518                    else
2519                      {
2520                      fprintf(outfile, "\\x%02x ", i);
2521                      c += 5;
2522                      }
2523                    }
2524                  }
2525                fprintf(outfile, "\n");
2526                }
2527              }
2528    
2529            /* Show this only if the JIT was set by /S, not by -s. */
2530    
2531            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2532              {
2533              int jit;
2534              new_info(re, extra, PCRE_INFO_JIT, &jit);
2535              if (jit)
2536                fprintf(outfile, "JIT study was successful\n");
2537              else
2538    #ifdef SUPPORT_JIT
2539                fprintf(outfile, "JIT study was not successful\n");
2540    #else
2541                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2542    #endif
2543              }
2544          }          }
2545        }        }
2546    
2547      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
2548      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
2549        the study length, in big-endian order. */
2550    
2551      if (do_study)      if (to_file != NULL)
2552        {        {
2553        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
2554          if (f == NULL)
2555          {          {
2556          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
2557          }          }
2558          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       else if (do_showinfo)  
2559          {          {
2560          uschar *start_bits = NULL;          pcre_uint8 sbuf[8];
2561          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2562          if (start_bits == NULL)          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2563            fprintf(outfile, "No starting character set\n");          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2564            sbuf[3] = (pcre_uint8)((true_size) & 255);
2565    
2566            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2567            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2568            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2569            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2570    
2571            if (fwrite(sbuf, 1, 8, f) < 8 ||
2572                fwrite(re, 1, true_size, f) < true_size)
2573              {
2574              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2575              }
2576          else          else
2577            {            {
2578            int i;            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2579            int c = 24;  
2580            fprintf(outfile, "Starting character set: ");            /* If there is study data, write it. */
2581            for (i = 0; i < 256; i++)  
2582              if (extra != NULL)
2583              {              {
2584              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
2585                    true_study_size)
2586                {                {
2587                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
2588                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
2589                }                }
2590                else fprintf(outfile, "Study data written to %s\n", to_file);
2591              }              }
           fprintf(outfile, "\n");  
2592            }            }
2593            fclose(f);
2594            }
2595    
2596          new_free(re);
2597          if (extra != NULL)
2598            {
2599            PCRE_FREE_STUDY(extra);
2600            }
2601          if (locale_set)
2602            {
2603            new_free((void *)tables);
2604            setlocale(LC_CTYPE, "C");
2605            locale_set = 0;
2606          }          }
2607          continue;  /* With next regex */
2608        }        }
2609      }      }        /* End of non-POSIX compile */
2610    
2611    /* Read data lines and test them */    /* Read data lines and test them */
2612    
2613    for (;;)    for (;;)
2614      {      {
2615      unsigned char *q;      pcre_uint8 *q;
2616      unsigned char *bptr = dbuffer;      pcre_uint8 *bptr;
2617        int *use_offsets = offsets;
2618      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2619        int callout_data = 0;
2620        int callout_data_set = 0;
2621      int count, c;      int count, c;
2622      int copystrings = 0;      int copystrings = 0;
2623        int find_match_limit = default_find_match_limit;
2624      int getstrings = 0;      int getstrings = 0;
2625      int getlist = 0;      int getlist = 0;
2626      int gmatched = 0;      int gmatched = 0;
2627      int start_offset = 0;      int start_offset = 0;
2628        int start_offset_sign = 1;
2629      int g_notempty = 0;      int g_notempty = 0;
2630        int use_dfa = 0;
2631    
2632      options = 0;      options = 0;
2633    
2634      if (infile == stdin) printf("data> ");      *copynames = 0;
2635      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
2636    
2637        copynamesptr = copynames;
2638        getnamesptr = getnames;
2639    
2640        pcre_callout = callout;
2641        first_callout = 1;
2642        last_callout_mark = NULL;
2643        callout_extra = 0;
2644        callout_count = 0;
2645        callout_fail_count = 999999;
2646        callout_fail_id = -1;
2647        show_malloc = 0;
2648    
2649        if (extra != NULL) extra->flags &=
2650          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2651    
2652        len = 0;
2653        for (;;)
2654        {        {
2655        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2656        goto CONTINUE;          {
2657            if (len > 0)    /* Reached EOF without hitting a newline */
2658              {
2659              fprintf(outfile, "\n");
2660              break;
2661              }
2662            done = 1;
2663            goto CONTINUE;
2664            }
2665          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2666          len = (int)strlen((char *)buffer);
2667          if (buffer[len-1] == '\n') break;
2668        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2669    
     len = (int)strlen((char *)buffer);  
2670      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2671      buffer[len] = 0;      buffer[len] = 0;
2672      if (len == 0) break;      if (len == 0) break;
# Line 920  while (!done) Line 2674  while (!done)
2674      p = buffer;      p = buffer;
2675      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2676    
2677      q = dbuffer;      bptr = q = dbuffer;
2678      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2679        {        {
2680        int i = 0;        int i = 0;
2681        int n = 0;        int n = 0;
2682    
2683        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
2684          {          {
2685          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 941  while (!done) Line 2696  while (!done)
2696          c -= '0';          c -= '0';
2697          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2698            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2699    
2700    #if !defined NOUTF8
2701            if (use_utf && c > 255)
2702              {
2703              pcre_uint8 buff8[8];
2704              int ii, utn;
2705              utn = ord2utf8(c, buff8);
2706              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2707              c = buff8[ii];   /* Last byte */
2708              }
2709    #endif
2710          break;          break;
2711    
2712          case 'x':          case 'x':
2713    
2714          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2715    
2716    #if !defined NOUTF8
2717          if (*p == '{')          if (*p == '{')
2718            {            {
2719            unsigned char *pt = p;            pcre_uint8 *pt = p;
2720            c = 0;            c = 0;
2721            while (isxdigit(*(++pt)))  
2722              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2723              when isxdigit() is a macro that refers to its argument more than
2724              once. This is banned by the C Standard, but apparently happens in at
2725              least one MacOS environment. */
2726    
2727              for (pt++; isxdigit(*pt); pt++)
2728                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2729            if (*pt == '}')            if (*pt == '}')
2730              {              {
2731              unsigned char buffer[8];              pcre_uint8 buff8[8];
2732              int ii, utn;              int ii, utn;
2733              utn = ord2utf8(c, buffer);              if (use_utf)
2734              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];                {
2735              c = buffer[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2736                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2737                  c = buff8[ii];   /* Last byte */
2738                  }
2739                else
2740                 {
2741                 if (c > 255)
2742                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2743                     "UTF-8 mode is not enabled.\n"
2744                     "** Truncation will probably give the wrong result.\n", c);
2745                 }
2746              p = pt + 1;              p = pt + 1;
2747              break;              break;
2748              }              }
2749            /* Not correct form; fall through */            /* Not correct form; fall through */
2750            }            }
2751    #endif
2752    
2753          /* Ordinary \x */          /* Ordinary \x */
2754    
2755          c = 0;          c = 0;
2756          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2757            {            {
2758            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2759            p++;            p++;
2760            }            }
2761          break;          break;
2762    
2763          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
2764          p--;          p--;
2765          continue;          continue;
2766    
2767            case '>':
2768            if (*p == '-')
2769              {
2770              start_offset_sign = -1;
2771              p++;
2772              }
2773            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2774            start_offset *= start_offset_sign;
2775            continue;
2776    
2777          case 'A':  /* Option setting */          case 'A':  /* Option setting */
2778          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
2779          continue;          continue;
# Line 989  while (!done) Line 2783  while (!done)
2783          continue;          continue;
2784    
2785          case 'C':          case 'C':
2786          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
2787          copystrings |= 1 << n;            {
2788              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2789              copystrings |= 1 << n;
2790              }
2791            else if (isalnum(*p))
2792              {
2793              pcre_uchar *npp = copynamesptr;
2794              while (isalnum(*p)) *npp++ = *p++;
2795              *npp++ = 0;
2796              *npp = 0;
2797              n = pcre_get_stringnumber(re, (char *)copynamesptr);
2798              if (n < 0)
2799                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2800              copynamesptr = npp;
2801              }
2802            else if (*p == '+')
2803              {
2804              callout_extra = 1;
2805              p++;
2806              }
2807            else if (*p == '-')
2808              {
2809              pcre_callout = NULL;
2810              p++;
2811              }
2812            else if (*p == '!')
2813              {
2814              callout_fail_id = 0;
2815              p++;
2816              while(isdigit(*p))
2817                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2818              callout_fail_count = 0;
2819              if (*p == '!')
2820                {
2821                p++;
2822                while(isdigit(*p))
2823                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2824                }
2825              }
2826            else if (*p == '*')
2827              {
2828              int sign = 1;
2829              callout_data = 0;
2830              if (*(++p) == '-') { sign = -1; p++; }
2831              while(isdigit(*p))
2832                callout_data = callout_data * 10 + *p++ - '0';
2833              callout_data *= sign;
2834              callout_data_set = 1;
2835              }
2836            continue;
2837    
2838    #if !defined NODFA
2839            case 'D':
2840    #if !defined NOPOSIX
2841            if (posix || do_posix)
2842              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2843            else
2844    #endif
2845              use_dfa = 1;
2846          continue;          continue;
2847    #endif
2848    
2849    #if !defined NODFA
2850            case 'F':
2851            options |= PCRE_DFA_SHORTEST;
2852            continue;
2853    #endif
2854    
2855          case 'G':          case 'G':
2856            if (isdigit(*p))
2857              {
2858              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2859              getstrings |= 1 << n;
2860              }
2861            else if (isalnum(*p))
2862              {
2863              pcre_uchar *npp = getnamesptr;
2864              while (isalnum(*p)) *npp++ = *p++;
2865              *npp++ = 0;
2866              *npp = 0;
2867              n = pcre_get_stringnumber(re, (char *)getnamesptr);
2868              if (n < 0)
2869                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2870              getnamesptr = npp;
2871              }
2872            continue;
2873    
2874            case 'J':
2875          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
2876          getstrings |= 1 << n;          if (extra != NULL
2877                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2878                && extra->executable_jit != NULL)
2879              {
2880              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2881              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2882              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2883              }
2884          continue;          continue;
2885    
2886          case 'L':          case 'L':
2887          getlist = 1;          getlist = 1;
2888          continue;          continue;
2889    
2890            case 'M':
2891            find_match_limit = 1;
2892            continue;
2893    
2894          case 'N':          case 'N':
2895          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2896              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2897            else
2898              options |= PCRE_NOTEMPTY;
2899          continue;          continue;
2900    
2901          case 'O':          case 'O':
2902          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
2903          if (n > size_offsets_max)          if (n > size_offsets_max)
2904            {            {
   
 if (offsets != NULL)  
   
           free(offsets);  
2905            size_offsets_max = n;            size_offsets_max = n;
2906            offsets = malloc(size_offsets_max * sizeof(int));            free(offsets);
2907              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2908            if (offsets == NULL)            if (offsets == NULL)
2909              {              {
2910              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2911                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2912              return 1;              yield = 1;
2913                goto EXIT;
2914              }              }
2915            }            }
2916          use_size_offsets = n;          use_size_offsets = n;
2917            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
2918            continue;
2919    
2920  if (n == 0)          case 'P':
2921    {          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2922    free(offsets);            PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2923    offsets = NULL;          continue;
2924    size_offsets_max = 0;  
2925    }          case 'Q':
2926            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2927            if (extra == NULL)
2928              {
2929              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2930              extra->flags = 0;
2931              }
2932            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2933            extra->match_limit_recursion = n;
2934            continue;
2935    
2936            case 'q':
2937            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2938            if (extra == NULL)
2939              {
2940              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2941              extra->flags = 0;
2942              }
2943            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2944            extra->match_limit = n;
2945            continue;
2946    
2947    #if !defined NODFA
2948            case 'R':
2949            options |= PCRE_DFA_RESTART;
2950            continue;
2951    #endif
2952    
2953            case 'S':
2954            show_malloc = 1;
2955            continue;
2956    
2957            case 'Y':
2958            options |= PCRE_NO_START_OPTIMIZE;
2959          continue;          continue;
2960    
2961          case 'Z':          case 'Z':
2962          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2963          continue;          continue;
2964    
2965            case '?':
2966            options |= PCRE_NO_UTF8_CHECK;
2967            continue;
2968    
2969            case '<':
2970              {
2971              int x = check_newline(p, outfile);
2972              if (x == 0) goto NEXT_DATA;
2973              options |= x;
2974              while (*p++ != '>');
2975              }
2976            continue;
2977          }          }
2978        *q++ = c;        *q++ = c;
2979        }        }
2980      *q = 0;      *q = 0;
2981      len = q - dbuffer;      len = (int)(q - dbuffer);
2982    
2983        /* Move the data to the end of the buffer so that a read over the end of
2984        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2985        we are using the POSIX interface, we must include the terminating zero. */
2986    
2987    #if !defined NOPOSIX
2988        if (posix || do_posix)
2989          {
2990          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2991          bptr += buffer_size - len - 1;
2992          }
2993        else
2994    #endif
2995          {
2996          memmove(bptr + buffer_size - len, bptr, len);
2997          bptr += buffer_size - len;
2998          }
2999    
3000        if ((all_use_dfa || use_dfa) && find_match_limit)
3001          {
3002          printf("**Match limit not relevant for DFA matching: ignored\n");
3003          find_match_limit = 0;
3004          }
3005    
3006      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
3007      support timing. */      support timing or playing with the match limit or callout data. */
3008    
3009  #if !defined NOPOSIX  #if !defined NOPOSIX
3010      if (posix || do_posix)      if (posix || do_posix)
3011        {        {
3012        int rc;        int rc;
3013        int eflags = 0;        int eflags = 0;
3014        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
3015          if (use_size_offsets > 0)
3016            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3017        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3018        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3019          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3020    
3021        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3022    
3023        if (rc != 0)        if (rc != 0)
3024          {          {
3025          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3026          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3027          }          }
3028          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3029                  != 0)
3030            {
3031            fprintf(outfile, "Matched with REG_NOSUB\n");
3032            }
3033        else        else
3034          {          {
3035          size_t i;          size_t i;
3036          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
3037            {            {
3038            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3039              {              {
3040              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3041              pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer + pmatch[i].rm_so,
3042                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3043              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3044              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3045                {                {
3046                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3047                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3048                    outfile);
3049                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3050                }                }
3051              }              }
3052            }            }
3053          }          }
3054        free(pmatch);        free(pmatch);
3055          goto NEXT_DATA;
3056        }        }
3057    
3058    #endif  /* !defined NOPOSIX */
3059    
3060      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3061    
3062      else  #ifdef SUPPORT_PCRE16
3063  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3064          {
3065          len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3066          if (len < 0)
3067            {
3068            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3069              "converted to UTF-16\n");
3070            goto NEXT_DATA;
3071            }
3072          bptr = (pcre_uint8 *)buffer16;
3073          }
3074    #endif
3075    
3076      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3077        {        {
3078        if (timeit)        markptr = NULL;
3079    
3080          if (timeitm > 0)
3081          {          {
3082          register int i;          register int i;
3083          clock_t time_taken;          clock_t time_taken;
3084          clock_t start_time = clock();          clock_t start_time = clock();
3085          for (i = 0; i < LOOPREPEAT; i++)  
3086            count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
3087              start_offset, options | g_notempty, offsets, use_size_offsets);          if (all_use_dfa || use_dfa)
3088              {
3089              int workspace[1000];
3090              for (i = 0; i < timeitm; i++)
3091                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3092                  options | g_notempty, use_offsets, use_size_offsets, workspace,
3093                  sizeof(workspace)/sizeof(int));
3094              }
3095            else
3096    #endif
3097    
3098            for (i = 0; i < timeitm; i++)
3099              {
3100              PCRE_EXEC(count, re, extra, bptr, len,
3101                start_offset, options | g_notempty, use_offsets, use_size_offsets);
3102              }
3103          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3104          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3105            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
3106            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
3107            }
3108    
3109          /* If find_match_limit is set, we want to do repeated matches with
3110          varying limits in order to find the minimum value for the match limit and
3111          for the recursion limit. The match limits are relevant only to the normal
3112          running of pcre_exec(), so disable the JIT optimization. This makes it
3113          possible to run the same set of tests with and without JIT externally
3114          requested. */
3115    
3116          if (find_match_limit)
3117            {
3118            if (extra == NULL)
3119              {
3120              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3121              extra->flags = 0;
3122              }
3123            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3124    
3125            (void)check_match_limit(re, extra, bptr, len, start_offset,
3126              options|g_notempty, use_offsets, use_size_offsets,
3127              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3128              PCRE_ERROR_MATCHLIMIT, "match()");
3129    
3130            count = check_match_limit(re, extra, bptr, len, start_offset,
3131              options|g_notempty, use_offsets, use_size_offsets,
3132              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3133              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3134          }          }
3135    
3136        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
3137          start_offset, options | g_notempty, offsets, use_size_offsets);  
3138          else if (callout_data_set)
3139            {
3140            if (extra == NULL)
3141              {
3142              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3143              extra->flags = 0;
3144              }
3145            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3146            extra->callout_data = &callout_data;
3147            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3148              options | g_notempty, use_offsets, use_size_offsets);
3149            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3150            }
3151    
3152          /* The normal case is just to do the match once, with the default
3153          value of match_limit. */
3154    
3155    #if !defined NODFA
3156          else if (all_use_dfa || use_dfa)
3157            {
3158            int workspace[1000];
3159            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3160              options | g_notempty, use_offsets, use_size_offsets, workspace,
3161              sizeof(workspace)/sizeof(int));
3162            if (count == 0)
3163              {
3164              fprintf(outfile, "Matched, but too many subsidiary matches\n");
3165              count = use_size_offsets/2;
3166              }
3167            }
3168    #endif
3169    
3170        if (count == 0)        else
3171          {          {
3172          fprintf(outfile, "Matched, but too many substrings\n");          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3173          count = use_size_offsets/3;            options | g_notempty, use_offsets, use_size_offsets);
3174            if (count == 0)
3175              {
3176              fprintf(outfile, "Matched, but too many substrings\n");
3177              count = use_size_offsets/3;
3178              }
3179          }          }
3180    
3181        /* Matched */        /* Matched */
3182    
3183        if (count >= 0)        if (count >= 0)
3184          {          {
3185          int i;          int i, maxcount;
3186    
3187    #if !defined NODFA
3188            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3189    #endif
3190              maxcount = use_size_offsets/3;
3191    
3192            /* This is a check against a lunatic return value. */
3193    
3194            if (count > maxcount)
3195              {
3196              fprintf(outfile,
3197                "** PCRE error: returned count %d is too big for offset size %d\n",
3198                count, use_size_offsets);
3199              count = use_size_offsets/3;
3200              if (do_g || do_G)
3201                {
3202                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3203                do_g = do_G = FALSE;        /* Break g/G loop */
3204                }
3205              }
3206    
3207            /* do_allcaps requests showing of all captures in the pattern, to check
3208            unset ones at the end. */
3209    
3210            if (do_allcaps)
3211              {
3212              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3213              count++;   /* Allow for full match */
3214              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3215              }
3216    
3217            /* Output the captured substrings */
3218    
3219          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
3220            {            {
3221            if (offsets[i] < 0)            if (use_offsets[i] < 0)
3222                {
3223                if (use_offsets[i] != -1)
3224                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3225                    use_offsets[i], i);
3226                if (use_offsets[i+1] != -1)
3227                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3228                    use_offsets[i+1], i+1);
3229              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3230                }
3231            else            else
3232              {              {
3233              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3234              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);              PCHARSV(bptr + use_offsets[i],
3235                  use_offsets[i+1] - use_offsets[i], outfile);
3236              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3237              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3238                {                {