/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 57 by nigel, Sat Feb 24 21:39:50 2007 UTC code/branches/pcre16/pcretest.c revision 805 by ph10, Wed Dec 14 16:49:20 2011 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84    #ifdef __BORLANDC__
85    #define _setmode(handle, mode) setmode(handle, mode)
86    #endif
87    
88    /* Not Windows */
89    
90    #else
91    #include <sys/time.h>          /* These two includes are needed */
92    #include <sys/resource.h>      /* for setrlimit(). */
93    #define INPUT_MODE   "rb"
94    #define OUTPUT_MODE  "wb"
95    #endif
96    
97    
98    /* We have to include pcre_internal.h because we need the internal info for
99    displaying the results of pcre_study() and we also need to know about the
100    internal macros, structures, and other internal data values; pcretest has
101    "inside information" compared to a program that strictly follows the PCRE API.
102    
103    Although pcre_internal.h does itself include pcre.h, we explicitly include it
104    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105    appropriately for an application, not for building PCRE. */
106    
107    #include "pcre.h"
108    #include "pcre_internal.h"
109    
110    /* The pcre_printint() function, which prints the internal form of a compiled
111    regex, is held in a separate file so that (a) it can be compiled in either
112    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
113    when that is compiled in debug mode. */
114    
115  /* Use the internal info for displaying the results of pcre_study(). */  #ifdef SUPPORT_PCRE8
116    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
117    #endif
118    #ifdef SUPPORT_PCRE16
119    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
120    #endif
121    
122    /* We need access to some of the data tables that PCRE uses. So as not to have
123    to keep two copies, we include the source file here, changing the names of the
124    external symbols to prevent clashes. */
125    
126    #define _pcre_ucp_gentype      ucp_gentype
127    #define _pcre_ucp_typerange    ucp_typerange
128    #define _pcre_utf8_table1      utf8_table1
129    #define _pcre_utf8_table1_size utf8_table1_size
130    #define _pcre_utf8_table2      utf8_table2
131    #define _pcre_utf8_table3      utf8_table3
132    #define _pcre_utf8_table4      utf8_table4
133    #define _pcre_utt              utt
134    #define _pcre_utt_size         utt_size
135    #define _pcre_utt_names        utt_names
136    #define _pcre_OP_lengths       OP_lengths
137    
138    #include "pcre_tables.c"
139    
140    /* The definition of the macro PRINTABLE, which determines whether to print an
141    output character as-is or as a hex value when showing compiled patterns, is
142    the same as in the printint.src file. We uses it here in cases when the locale
143    has not been explicitly changed, so as to get consistent output from systems
144    that differ in their output from isprint() even in the "C" locale. */
145    
146    #ifdef EBCDIC
147    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
148    #else
149    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
150    #endif
151    
152  #include "internal.h"  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
153    
154  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
155  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 159  Makefile. */
159  #include "pcreposix.h"  #include "pcreposix.h"
160  #endif  #endif
161    
162    /* It is also possible, for the benefit of the version currently imported into
163    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
164    interface to the DFA matcher (NODFA), and without the doublecheck of the old
165    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
166    UTF8 support if PCRE is built without it. */
167    
168    #ifndef SUPPORT_UTF8
169    #ifndef NOUTF8
170    #define NOUTF8
171    #endif
172    #endif
173    
174    
175    /* Other parameters */
176    
177  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
178  #ifdef CLK_TCK  #ifdef CLK_TCK
179  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 182  Makefile. */
182  #endif  #endif
183  #endif  #endif
184    
185  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
186    
187    #define LOOPREPEAT 500000
188    
189    /* Static variables */
190    
191  static FILE *outfile;  static FILE *outfile;
192  static int log_store = 0;  static int log_store = 0;
193    static int callout_count;
194    static int callout_extra;
195    static int callout_fail_count;
196    static int callout_fail_id;
197    static int debug_lengths;
198    static int first_callout;
199    static int locale_set = 0;
200    static int show_malloc;
201    static int use_utf8;
202  static size_t gotten_store;  static size_t gotten_store;
203    static size_t first_gotten_store = 0;
204    static const unsigned char *last_callout_mark = NULL;
205    
206    static int (*fullinfo)(const pcre *, const pcre_extra *, int, void *);
207    
208    /* The buffers grow automatically if very long input lines are encountered. */
209    
210    static int buffer_size = 50000;
211    static pcre_uint8 *buffer = NULL;
212    static pcre_uint8 *dbuffer = NULL;
213    static pcre_uint8 *pbuffer = NULL;
214    
215    #ifdef SUPPORT_PCRE16
216    static int buffer16_size = 0;
217    static pcre_uint16 *buffer16 = NULL;
218    #endif
219    
220    /* Textual explanations for runtime error codes */
221    
222  static int utf8_table1[] = {  static const char *errtexts[] = {
223    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};    NULL,  /* 0 is no error */
224      NULL,  /* NOMATCH is handled specially */
225      "NULL argument passed",
226      "bad option value",
227      "magic number missing",
228      "unknown opcode - pattern overwritten?",
229      "no more memory",
230      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
231      "match limit exceeded",
232      "callout error code",
233      NULL,  /* BADUTF8 is handled specially */
234      "bad UTF-8 offset",
235      NULL,  /* PARTIAL is handled specially */
236      "not used - internal error",
237      "internal error - pattern overwritten?",
238      "bad count value",
239      "item unsupported for DFA matching",
240      "backreference condition or recursion test not supported for DFA matching",
241      "match limit not supported for DFA matching",
242      "workspace size exceeded in DFA matching",
243      "too much recursion for DFA matching",
244      "recursion limit exceeded",
245      "not used - internal error",
246      "invalid combination of newline options",
247      "bad offset value",
248      NULL,  /* SHORTUTF8 is handled specially */
249      "nested recursion at the same subject position",
250      "JIT stack limit reached",
251      "pattern compiled in wrong mode (8-bit/16-bit error)"
252    };
253    
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
254    
255  static int utf8_table3[] = {  /*************************************************
256    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  *         Alternate character tables             *
257    *************************************************/
258    
259    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
260    using the default tables of the library. However, the T option can be used to
261    select alternate sets of tables, for different kinds of testing. Note also that
262    the L (locale) option also adjusts the tables. */
263    
264    /* This is the set of tables distributed as default with PCRE. It recognizes
265    only ASCII characters. */
266    
267    static const unsigned char tables0[] = {
268    
269    /* This table is a lower casing table. */
270    
271        0,  1,  2,  3,  4,  5,  6,  7,
272        8,  9, 10, 11, 12, 13, 14, 15,
273       16, 17, 18, 19, 20, 21, 22, 23,
274       24, 25, 26, 27, 28, 29, 30, 31,
275       32, 33, 34, 35, 36, 37, 38, 39,
276       40, 41, 42, 43, 44, 45, 46, 47,
277       48, 49, 50, 51, 52, 53, 54, 55,
278       56, 57, 58, 59, 60, 61, 62, 63,
279       64, 97, 98, 99,100,101,102,103,
280      104,105,106,107,108,109,110,111,
281      112,113,114,115,116,117,118,119,
282      120,121,122, 91, 92, 93, 94, 95,
283       96, 97, 98, 99,100,101,102,103,
284      104,105,106,107,108,109,110,111,
285      112,113,114,115,116,117,118,119,
286      120,121,122,123,124,125,126,127,
287      128,129,130,131,132,133,134,135,
288      136,137,138,139,140,141,142,143,
289      144,145,146,147,148,149,150,151,
290      152,153,154,155,156,157,158,159,
291      160,161,162,163,164,165,166,167,
292      168,169,170,171,172,173,174,175,
293      176,177,178,179,180,181,182,183,
294      184,185,186,187,188,189,190,191,
295      192,193,194,195,196,197,198,199,
296      200,201,202,203,204,205,206,207,
297      208,209,210,211,212,213,214,215,
298      216,217,218,219,220,221,222,223,
299      224,225,226,227,228,229,230,231,
300      232,233,234,235,236,237,238,239,
301      240,241,242,243,244,245,246,247,
302      248,249,250,251,252,253,254,255,
303    
304    /* This table is a case flipping table. */
305    
306        0,  1,  2,  3,  4,  5,  6,  7,
307        8,  9, 10, 11, 12, 13, 14, 15,
308       16, 17, 18, 19, 20, 21, 22, 23,
309       24, 25, 26, 27, 28, 29, 30, 31,
310       32, 33, 34, 35, 36, 37, 38, 39,
311       40, 41, 42, 43, 44, 45, 46, 47,
312       48, 49, 50, 51, 52, 53, 54, 55,
313       56, 57, 58, 59, 60, 61, 62, 63,
314       64, 97, 98, 99,100,101,102,103,
315      104,105,106,107,108,109,110,111,
316      112,113,114,115,116,117,118,119,
317      120,121,122, 91, 92, 93, 94, 95,
318       96, 65, 66, 67, 68, 69, 70, 71,
319       72, 73, 74, 75, 76, 77, 78, 79,
320       80, 81, 82, 83, 84, 85, 86, 87,
321       88, 89, 90,123,124,125,126,127,
322      128,129,130,131,132,133,134,135,
323      136,137,138,139,140,141,142,143,
324      144,145,146,147,148,149,150,151,
325      152,153,154,155,156,157,158,159,
326      160,161,162,163,164,165,166,167,
327      168,169,170,171,172,173,174,175,
328      176,177,178,179,180,181,182,183,
329      184,185,186,187,188,189,190,191,
330      192,193,194,195,196,197,198,199,
331      200,201,202,203,204,205,206,207,
332      208,209,210,211,212,213,214,215,
333      216,217,218,219,220,221,222,223,
334      224,225,226,227,228,229,230,231,
335      232,233,234,235,236,237,238,239,
336      240,241,242,243,244,245,246,247,
337      248,249,250,251,252,253,254,255,
338    
339    /* This table contains bit maps for various character classes. Each map is 32
340    bytes long and the bits run from the least significant end of each byte. The
341    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
342    graph, print, punct, and cntrl. Other classes are built from combinations. */
343    
344      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
345      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348    
349      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
350      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
351      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
352      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
353    
354      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
355      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
356      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358    
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363    
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368    
369      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
370      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
373    
374      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
375      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
376      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
377      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
378    
379      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
380      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
381      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
382      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
383    
384      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
385      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
386      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
387      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
388    
389      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
390      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
391      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
392      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
393    
394    /* This table identifies various classes of character by individual bits:
395      0x01   white space character
396      0x02   letter
397      0x04   decimal digit
398      0x08   hexadecimal digit
399      0x10   alphanumeric or '_'
400      0x80   regular expression metacharacter or binary zero
401    */
402    
403      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
404      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
405      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
406      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
407      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
408      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
409      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
410      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
411      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
412      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
413      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
414      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
415      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
416      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
417      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
418      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
419      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
420      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
421      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
422      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
423      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
424      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
425      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
426      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
427      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
428      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
429      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
430      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
431      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
432      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
433      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
434      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
435    
436    /* This is a set of tables that came orginally from a Windows user. It seems to
437    be at least an approximation of ISO 8859. In particular, there are characters
438    greater than 128 that are marked as spaces, letters, etc. */
439    
440    static const unsigned char tables1[] = {
441    0,1,2,3,4,5,6,7,
442    8,9,10,11,12,13,14,15,
443    16,17,18,19,20,21,22,23,
444    24,25,26,27,28,29,30,31,
445    32,33,34,35,36,37,38,39,
446    40,41,42,43,44,45,46,47,
447    48,49,50,51,52,53,54,55,
448    56,57,58,59,60,61,62,63,
449    64,97,98,99,100,101,102,103,
450    104,105,106,107,108,109,110,111,
451    112,113,114,115,116,117,118,119,
452    120,121,122,91,92,93,94,95,
453    96,97,98,99,100,101,102,103,
454    104,105,106,107,108,109,110,111,
455    112,113,114,115,116,117,118,119,
456    120,121,122,123,124,125,126,127,
457    128,129,130,131,132,133,134,135,
458    136,137,138,139,140,141,142,143,
459    144,145,146,147,148,149,150,151,
460    152,153,154,155,156,157,158,159,
461    160,161,162,163,164,165,166,167,
462    168,169,170,171,172,173,174,175,
463    176,177,178,179,180,181,182,183,
464    184,185,186,187,188,189,190,191,
465    224,225,226,227,228,229,230,231,
466    232,233,234,235,236,237,238,239,
467    240,241,242,243,244,245,246,215,
468    248,249,250,251,252,253,254,223,
469    224,225,226,227,228,229,230,231,
470    232,233,234,235,236,237,238,239,
471    240,241,242,243,244,245,246,247,
472    248,249,250,251,252,253,254,255,
473    0,1,2,3,4,5,6,7,
474    8,9,10,11,12,13,14,15,
475    16,17,18,19,20,21,22,23,
476    24,25,26,27,28,29,30,31,
477    32,33,34,35,36,37,38,39,
478    40,41,42,43,44,45,46,47,
479    48,49,50,51,52,53,54,55,
480    56,57,58,59,60,61,62,63,
481    64,97,98,99,100,101,102,103,
482    104,105,106,107,108,109,110,111,
483    112,113,114,115,116,117,118,119,
484    120,121,122,91,92,93,94,95,
485    96,65,66,67,68,69,70,71,
486    72,73,74,75,76,77,78,79,
487    80,81,82,83,84,85,86,87,
488    88,89,90,123,124,125,126,127,
489    128,129,130,131,132,133,134,135,
490    136,137,138,139,140,141,142,143,
491    144,145,146,147,148,149,150,151,
492    152,153,154,155,156,157,158,159,
493    160,161,162,163,164,165,166,167,
494    168,169,170,171,172,173,174,175,
495    176,177,178,179,180,181,182,183,
496    184,185,186,187,188,189,190,191,
497    224,225,226,227,228,229,230,231,
498    232,233,234,235,236,237,238,239,
499    240,241,242,243,244,245,246,215,
500    248,249,250,251,252,253,254,223,
501    192,193,194,195,196,197,198,199,
502    200,201,202,203,204,205,206,207,
503    208,209,210,211,212,213,214,247,
504    216,217,218,219,220,221,222,255,
505    0,62,0,0,1,0,0,0,
506    0,0,0,0,0,0,0,0,
507    32,0,0,0,1,0,0,0,
508    0,0,0,0,0,0,0,0,
509    0,0,0,0,0,0,255,3,
510    126,0,0,0,126,0,0,0,
511    0,0,0,0,0,0,0,0,
512    0,0,0,0,0,0,0,0,
513    0,0,0,0,0,0,255,3,
514    0,0,0,0,0,0,0,0,
515    0,0,0,0,0,0,12,2,
516    0,0,0,0,0,0,0,0,
517    0,0,0,0,0,0,0,0,
518    254,255,255,7,0,0,0,0,
519    0,0,0,0,0,0,0,0,
520    255,255,127,127,0,0,0,0,
521    0,0,0,0,0,0,0,0,
522    0,0,0,0,254,255,255,7,
523    0,0,0,0,0,4,32,4,
524    0,0,0,128,255,255,127,255,
525    0,0,0,0,0,0,255,3,
526    254,255,255,135,254,255,255,7,
527    0,0,0,0,0,4,44,6,
528    255,255,127,255,255,255,127,255,
529    0,0,0,0,254,255,255,255,
530    255,255,255,255,255,255,255,127,
531    0,0,0,0,254,255,255,255,
532    255,255,255,255,255,255,255,255,
533    0,2,0,0,255,255,255,255,
534    255,255,255,255,255,255,255,127,
535    0,0,0,0,255,255,255,255,
536    255,255,255,255,255,255,255,255,
537    0,0,0,0,254,255,0,252,
538    1,0,0,248,1,0,0,120,
539    0,0,0,0,254,255,255,255,
540    0,0,128,0,0,0,128,0,
541    255,255,255,255,0,0,0,0,
542    0,0,0,0,0,0,0,128,
543    255,255,255,255,0,0,0,0,
544    0,0,0,0,0,0,0,0,
545    128,0,0,0,0,0,0,0,
546    0,1,1,0,1,1,0,0,
547    0,0,0,0,0,0,0,0,
548    0,0,0,0,0,0,0,0,
549    1,0,0,0,128,0,0,0,
550    128,128,128,128,0,0,128,0,
551    28,28,28,28,28,28,28,28,
552    28,28,0,0,0,0,0,128,
553    0,26,26,26,26,26,26,18,
554    18,18,18,18,18,18,18,18,
555    18,18,18,18,18,18,18,18,
556    18,18,18,128,128,0,128,16,
557    0,26,26,26,26,26,26,18,
558    18,18,18,18,18,18,18,18,
559    18,18,18,18,18,18,18,18,
560    18,18,18,128,128,0,0,0,
561    0,0,0,0,0,1,0,0,
562    0,0,0,0,0,0,0,0,
563    0,0,0,0,0,0,0,0,
564    0,0,0,0,0,0,0,0,
565    1,0,0,0,0,0,0,0,
566    0,0,18,0,0,0,0,0,
567    0,0,20,20,0,18,0,0,
568    0,20,18,0,0,0,0,0,
569    18,18,18,18,18,18,18,18,
570    18,18,18,18,18,18,18,18,
571    18,18,18,18,18,18,18,0,
572    18,18,18,18,18,18,18,18,
573    18,18,18,18,18,18,18,18,
574    18,18,18,18,18,18,18,18,
575    18,18,18,18,18,18,18,0,
576    18,18,18,18,18,18,18,18
577    };
578    
579    
580    
581    
582    #ifndef HAVE_STRERROR
583  /*************************************************  /*************************************************
584  *       Convert character value to UTF-8         *  *     Provide strerror() for non-ANSI libraries  *
585  *************************************************/  *************************************************/
586    
587  /* This function takes an integer value in the range 0 - 0x7fffffff  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
588  and encodes it as a UTF-8 character in 0 to 6 bytes.  in their libraries, but can provide the same facility by this simple
589    alternative function. */
590    
591  Arguments:  extern int   sys_nerr;
592    cvalue     the character value  extern char *sys_errlist[];
   buffer     pointer to buffer for result - at least 6 bytes long  
593    
594  Returns:     number of characters placed in the buffer  char *
595               -1 if input character is negative  strerror(int n)
596               0 if input character is positive but too big (only when  {
597               int is longer than 32 bits)  if (n < 0 || n >= sys_nerr) return "unknown error number";
598  */  return sys_errlist[n];
599    }
600    #endif /* HAVE_STRERROR */
601    
602    
603    /*************************************************
604    *         JIT memory callback                    *
605    *************************************************/
606    
607    static pcre_jit_stack* jit_callback(void *arg)
608    {
609    return (pcre_jit_stack *)arg;
610    }
611    
612    
613    #ifdef SUPPORT_PCRE16
614    /*************************************************
615    *         Convert a string to 16-bit             *
616    *************************************************/
617    
618    /* The result is always left in buffer16. */
619    
620  static int  static int
621  ord2utf8(int cvalue, unsigned char *buffer)  to16(unsigned char *p, int utf)
622  {  {
623  register int i, j;  pcre_uint16 *pp;
624  for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  int len = (int)strlen((char *)p) + 1;
625    if (cvalue <= utf8_table1[i]) break;  
626  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  if (buffer16_size < 2*len)
 if (cvalue < 0) return -1;  
 *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
 cvalue >>= 6 - i;  
 for (j = 0; j < i; j++)  
627    {    {
628    *buffer++ = 0x80 | (cvalue & 0x3f);    if (buffer16 != NULL) free(buffer16);
629    cvalue >>= 6;    buffer16_size = 2*len;
630      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
631      if (buffer16 == NULL)
632        {
633        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
634        exit(1);
635        }
636    }    }
637  return i + 1;  
638    pp = buffer16;
639    
640    if (!utf)
641      {
642      while (*p != 0) *pp++ = *p++;
643      *pp++ = 0;
644      }
645    
646    else
647      {
648    fprintf(stderr, "pcretest: no support yet for UTF-16\n");
649    exit(1);
650      }
651    
652    return pp - buffer16;
653    }
654    #endif
655    
656    
657    /*************************************************
658    *        Read or extend an input line            *
659    *************************************************/
660    
661    /* Input lines are read into buffer, but both patterns and data lines can be
662    continued over multiple input lines. In addition, if the buffer fills up, we
663    want to automatically expand it so as to be able to handle extremely large
664    lines that are needed for certain stress tests. When the input buffer is
665    expanded, the other two buffers must also be expanded likewise, and the
666    contents of pbuffer, which are a copy of the input for callouts, must be
667    preserved (for when expansion happens for a data line). This is not the most
668    optimal way of handling this, but hey, this is just a test program!
669    
670    Arguments:
671      f            the file to read
672      start        where in buffer to start (this *must* be within buffer)
673      prompt       for stdin or readline()
674    
675    Returns:       pointer to the start of new data
676                   could be a copy of start, or could be moved
677                   NULL if no data read and EOF reached
678    */
679    
680    static pcre_uint8 *
681    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
682    {
683    pcre_uint8 *here = start;
684    
685    for (;;)
686      {
687      int rlen = (int)(buffer_size - (here - buffer));
688    
689      if (rlen > 1000)
690        {
691        int dlen;
692    
693        /* If libreadline support is required, use readline() to read a line if the
694        input is a terminal. Note that readline() removes the trailing newline, so
695        we must put it back again, to be compatible with fgets(). */
696    
697    #ifdef SUPPORT_LIBREADLINE
698        if (isatty(fileno(f)))
699          {
700          size_t len;
701          char *s = readline(prompt);
702          if (s == NULL) return (here == start)? NULL : start;
703          len = strlen(s);
704          if (len > 0) add_history(s);
705          if (len > rlen - 1) len = rlen - 1;
706          memcpy(here, s, len);
707          here[len] = '\n';
708          here[len+1] = 0;
709          free(s);
710          }
711        else
712    #endif
713    
714        /* Read the next line by normal means, prompting if the file is stdin. */
715    
716          {
717          if (f == stdin) printf("%s", prompt);
718          if (fgets((char *)here, rlen,  f) == NULL)
719            return (here == start)? NULL : start;
720          }
721    
722        dlen = (int)strlen((char *)here);
723        if (dlen > 0 && here[dlen - 1] == '\n') return start;
724        here += dlen;
725        }
726    
727      else
728        {
729        int new_buffer_size = 2*buffer_size;
730        pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
731        pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
732        pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
733    
734        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
735          {
736          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
737          exit(1);
738          }
739    
740        memcpy(new_buffer, buffer, buffer_size);
741        memcpy(new_pbuffer, pbuffer, buffer_size);
742    
743        buffer_size = new_buffer_size;
744    
745        start = new_buffer + (start - buffer);
746        here = new_buffer + (here - buffer);
747    
748        free(buffer);
749        free(dbuffer);
750        free(pbuffer);
751    
752        buffer = new_buffer;
753        dbuffer = new_dbuffer;
754        pbuffer = new_pbuffer;
755        }
756      }
757    
758    return NULL;  /* Control never gets here */
759    }
760    
761    
762    
763    
764    
765    
766    
767    /*************************************************
768    *          Read number from string               *
769    *************************************************/
770    
771    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
772    around with conditional compilation, just do the job by hand. It is only used
773    for unpicking arguments, so just keep it simple.
774    
775    Arguments:
776      str           string to be converted
777      endptr        where to put the end pointer
778    
779    Returns:        the unsigned long
780    */
781    
782    static int
783    get_value(unsigned char *str, unsigned char **endptr)
784    {
785    int result = 0;
786    while(*str != 0 && isspace(*str)) str++;
787    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
788    *endptr = str;
789    return(result);
790  }  }
791    
792    
793    
794    
795  /*************************************************  /*************************************************
796  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
797  *************************************************/  *************************************************/
# Line 92  return i + 1; Line 800  return i + 1;
800  and returns the value of the character.  and returns the value of the character.
801    
802  Argument:  Argument:
803    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
804    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
805    
806  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
807             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
808  */  */
809    
810  int  #if !defined NOUTF8
811  utf82ord(unsigned char *buffer, int *vptr)  
812    static int
813    utf82ord(unsigned char *utf8bytes, int *vptr)
814  {  {
815  int c = *buffer++;  int c = *utf8bytes++;
816  int d = c;  int d = c;
817  int i, j, s;  int i, j, s;
818    
# Line 117  if (i == 0 || i == 6) return 0; / Line 827  if (i == 0 || i == 6) return 0; /
827    
828  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
829    
830  d = c & utf8_table3[i];  s = 6*i;
831  s = 6 - i;  d = (c & utf8_table3[i]) << s;
832    
833  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
834    {    {
835    c = *buffer++;    c = *utf8bytes++;
836    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
837      s -= 6;
838    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
839    }    }
840    
841  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
842    
843  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
844    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
845  if (j != i) return -(i+1);  if (j != i) return -(i+1);
846    
# Line 140  if (j != i) return -(i+1); Line 850  if (j != i) return -(i+1);
850  return i+1;  return i+1;
851  }  }
852    
853    #endif
854    
855    
856    
857    /*************************************************
858    *       Convert character value to UTF-8         *
859    *************************************************/
860    
861    /* This function takes an integer value in the range 0 - 0x7fffffff
862    and encodes it as a UTF-8 character in 0 to 6 bytes.
863    
864  /* Debugging function to print the internal form of the regex. This is the same  Arguments:
865  code as contained in pcre.c under the DEBUG macro. */    cvalue     the character value
866      utf8bytes  pointer to buffer for result - at least 6 bytes long
867    
868  static const char *OP_names[] = {  Returns:     number of characters placed in the buffer
869    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  */
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
870    
871    #if !defined NOUTF8
872    
873  static void print_internals(pcre *re)  static int
874    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
875  {  {
876  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
877    for (i = 0; i < utf8_table1_size; i++)
878      if (cvalue <= utf8_table1[i]) break;
879    utf8bytes += i;
880    for (j = i; j > 0; j--)
881     {
882     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
883     cvalue >>= 6;
884     }
885    *utf8bytes = utf8_table2[i] | cvalue;
886    return i + 1;
887    }
888    
889  fprintf(outfile, "------------------------------------------------------------------\n");  #endif
890    
891  for(;;)  
892    
893    /*************************************************
894    *             Print character string             *
895    *************************************************/
896    
897    /* Character string printing function. Must handle UTF-8 strings in utf8
898    mode. Yields number of characters printed. If handed a NULL file, just counts
899    chars without printing. */
900    
901    static int pchars(unsigned char *p, int length, FILE *f)
902    {
903    int c = 0;
904    int yield = 0;
905    
906    while (length-- > 0)
907    {    {
908    int c;  #if !defined NOUTF8
909    int charlength;    if (use_utf8)
910        {
911        int rc = utf82ord(p, &c);
912    
913        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
914          {
915          length -= rc - 1;
916          p += rc;
917          if (PRINTHEX(c))
918            {
919            if (f != NULL) fprintf(f, "%c", c);
920            yield++;
921            }
922          else
923            {
924            int n = 4;
925            if (f != NULL) fprintf(f, "\\x{%02x}", c);
926            yield += (n <= 0x000000ff)? 2 :
927                     (n <= 0x00000fff)? 3 :
928                     (n <= 0x0000ffff)? 4 :
929                     (n <= 0x000fffff)? 5 : 6;
930            }
931          continue;
932          }
933        }
934    #endif
935    
936       /* Not UTF-8, or malformed UTF-8  */
937    
938      c = *p++;
939      if (PRINTHEX(c))
940        {
941        if (f != NULL) fprintf(f, "%c", c);
942        yield++;
943        }
944      else
945        {
946        if (f != NULL) fprintf(f, "\\x%02x", c);
947        yield += 4;
948        }
949      }
950    
951    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  return yield;
952    }
953    
954    if (*code >= OP_BRA)  
955    
956    /*************************************************
957    *              Callout function                  *
958    *************************************************/
959    
960    /* Called from PCRE as a result of the (?C) item. We print out where we are in
961    the match. Yield zero unless more callouts than the fail count, or the callout
962    data is not zero. */
963    
964    static int callout(pcre_callout_block *cb)
965    {
966    FILE *f = (first_callout | callout_extra)? outfile : NULL;
967    int i, pre_start, post_start, subject_length;
968    
969    if (callout_extra)
970      {
971      fprintf(f, "Callout %d: last capture = %d\n",
972        cb->callout_number, cb->capture_last);
973    
974      for (i = 0; i < cb->capture_top * 2; i += 2)
975      {      {
976      if (*code - OP_BRA > EXTRACT_BASIC_MAX)      if (cb->offset_vector[i] < 0)
977        fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);        fprintf(f, "%2d: <unset>\n", i/2);
978      else      else
979        fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);        {
980      code += 2;        fprintf(f, "%2d: ", i/2);
981          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
982            cb->offset_vector[i+1] - cb->offset_vector[i], f);
983          fprintf(f, "\n");
984          }
985      }      }
986      }
987    
988    /* Re-print the subject in canonical form, the first time or if giving full
989    datails. On subsequent calls in the same match, we use pchars just to find the
990    printed lengths of the substrings. */
991    
992    if (f != NULL) fprintf(f, "--->");
993    
994    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
995    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
996      cb->current_position - cb->start_match, f);
997    
998    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
999    
1000    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
1001      cb->subject_length - cb->current_position, f);
1002    
1003    if (f != NULL) fprintf(f, "\n");
1004    
1005    /* Always print appropriate indicators, with callout number if not already
1006    shown. For automatic callouts, show the pattern offset. */
1007    
1008    if (cb->callout_number == 255)
1009      {
1010      fprintf(outfile, "%+3d ", cb->pattern_position);
1011      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
1012      }
1013    else
1014      {
1015      if (callout_extra) fprintf(outfile, "    ");
1016        else fprintf(outfile, "%3d ", cb->callout_number);
1017      }
1018    
1019    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1020    fprintf(outfile, "^");
1021    
1022    if (post_start > 0)
1023      {
1024      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1025      fprintf(outfile, "^");
1026      }
1027    
1028    else switch(*code)  for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1029      fprintf(outfile, " ");
1030    
1031    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1032      pbuffer + cb->pattern_position);
1033    
1034    fprintf(outfile, "\n");
1035    first_callout = 0;
1036    
1037    if (cb->mark != last_callout_mark)
1038      {
1039      fprintf(outfile, "Latest Mark: %s\n",
1040        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1041      last_callout_mark = cb->mark;
1042      }
1043    
1044    if (cb->callout_data != NULL)
1045      {
1046      int callout_data = *((int *)(cb->callout_data));
1047      if (callout_data != 0)
1048      {      {
1049      case OP_END:      fprintf(outfile, "Callout data = %d\n", callout_data);
1050      fprintf(outfile, "    %s\n", OP_names[*code]);      return callout_data;
1051      fprintf(outfile, "------------------------------------------------------------------\n");      }
1052      return;    }
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
1053    
1054        CLASS_REF_REPEAT:  return (cb->callout_number != callout_fail_id)? 0 :
1055           (++callout_count >= callout_fail_count)? 1 : 0;
1056    }
1057    
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
1058    
1059          case OP_CRRANGE:  /*************************************************
1060          case OP_CRMINRANGE:  *            Local malloc functions              *
1061          min = (code[1] << 8) + code[2];  *************************************************/
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
1062    
1063          default:  /* Alternative malloc function, to test functionality and save the size of a
1064          code--;  compiled re, which is the first store request that pcre_compile() makes. The
1065          }  show_malloc variable is set only during matching. */
1066        }  
1067      break;  static void *new_malloc(size_t size)
1068    {
1069    void *block = malloc(size);
1070    gotten_store = size;
1071    if (first_gotten_store == 0) first_gotten_store = size;
1072    if (show_malloc)
1073      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1074    return block;
1075    }
1076    
1077    static void new_free(void *block)
1078    {
1079    if (show_malloc)
1080      fprintf(outfile, "free             %p\n", block);
1081    free(block);
1082    }
1083    
1084    /* For recursion malloc/free, to test stacking calls */
1085    
1086    static void *stack_malloc(size_t size)
1087    {
1088    void *block = malloc(size);
1089    if (show_malloc)
1090      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1091    return block;
1092    }
1093    
1094    static void stack_free(void *block)
1095    {
1096    if (show_malloc)
1097      fprintf(outfile, "stack_free       %p\n", block);
1098    free(block);
1099    }
1100    
1101    
1102    /*************************************************
1103    *          Call pcre_fullinfo()                  *
1104    *************************************************/
1105    
1106    /* Get one piece of information from the pcre_fullinfo() function */
1107    
1108    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1109    {
1110    int rc;
1111    if ((rc = (fullinfo)(re, study, option, ptr)) < 0)
1112      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1113    }
1114    
     /* Anything else is just a one-node item */  
1115    
     default:  
     fprintf(outfile, "    %s", OP_names[*code]);  
     break;  
     }  
1116    
1117    code++;  /*************************************************
1118    fprintf(outfile, "\n");  *         Byte flipping function                 *
1119    }  *************************************************/
1120    
1121    static unsigned long int
1122    byteflip(unsigned long int value, int n)
1123    {
1124    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1125    return ((value & 0x000000ff) << 24) |
1126           ((value & 0x0000ff00) <<  8) |
1127           ((value & 0x00ff0000) >>  8) |
1128           ((value & 0xff000000) >> 24);
1129  }  }
1130    
1131    
1132    
 /* Character string printing function. A "normal" and a UTF-8 version. */  
1133    
1134  static void pchars(unsigned char *p, int length, int utf8)  /*************************************************
1135    *        Check match or recursion limit          *
1136    *************************************************/
1137    
1138    static int
1139    check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1140      int start_offset, int options, int *use_offsets, int use_size_offsets,
1141      int flag, unsigned long int *limit, int errnumber, const char *msg)
1142  {  {
1143  int c;  int count;
1144  while (length-- > 0)  int min = 0;
1145    int mid = 64;
1146    int max = -1;
1147    
1148    extra->flags |= flag;
1149    
1150    for (;;)
1151    {    {
1152    if (utf8)    *limit = mid;
1153    
1154      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1155        use_offsets, use_size_offsets);
1156    
1157      if (count == errnumber)
1158      {      {
1159      int rc = utf82ord(p, &c);      /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1160      if (rc > 0)      min = mid;
1161        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1162        }
1163    
1164      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1165                             count == PCRE_ERROR_PARTIAL)
1166        {
1167        if (mid == min + 1)
1168        {        {
1169        length -= rc - 1;        fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1170        p += rc;        break;
       if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x{%02x}", c);  
       continue;  
1171        }        }
1172        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1173        max = mid;
1174        mid = (min + mid)/2;
1175      }      }
1176      else break;    /* Some other error */
1177      }
1178    
1179     /* Not UTF-8, or malformed UTF-8  */  extra->flags &= ~flag;
1180    return count;
1181    }
1182    
1183    
1184    
1185    /*************************************************
1186    *         Case-independent strncmp() function    *
1187    *************************************************/
1188    
1189    /*
1190    Arguments:
1191      s         first string
1192      t         second string
1193      n         number of characters to compare
1194    
1195    Returns:    < 0, = 0, or > 0, according to the comparison
1196    */
1197    
1198    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  static int
1199      else fprintf(outfile, "\\x%02x", c);  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1200    {
1201    while (n--)
1202      {
1203      int c = tolower(*s++) - tolower(*t++);
1204      if (c) return c;
1205    }    }
1206    return 0;
1207  }  }
1208    
1209    
1210    
1211  /* Alternative malloc function, to test functionality and show the size of the  /*************************************************
1212  compiled re. */  *         Check newline indicator                *
1213    *************************************************/
1214    
1215    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1216    a message and return 0 if there is no match.
1217    
1218  static void *new_malloc(size_t size)  Arguments:
1219      p           points after the leading '<'
1220      f           file for error message
1221    
1222    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1223    */
1224    
1225    static int
1226    check_newline(pcre_uint8 *p, FILE *f)
1227  {  {
1228  gotten_store = size;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1229  if (log_store)  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1230    fprintf(outfile, "Memory allocation (code space): %d\n",  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1231      (int)((int)size - offsetof(real_pcre, code[0])));  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1232  return malloc(size);  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1233    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1234    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1235    fprintf(f, "Unknown newline type at: <%s\n", p);
1236    return 0;
1237  }  }
1238    
1239    
1240    
1241    /*************************************************
1242    *             Usage function                     *
1243    *************************************************/
1244    
1245  /* Get one piece of information from the pcre_fullinfo() function */  static void
1246    usage(void)
 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  
1247  {  {
1248  int rc;  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1249  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  printf("Input and output default to stdin and stdout.\n");
1250    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  #ifdef SUPPORT_LIBREADLINE
1251    printf("If input is a terminal, readline() is used to read from it.\n");
1252    #else
1253    printf("This version of pcretest is not linked with readline().\n");
1254    #endif
1255    printf("\nOptions:\n");
1256    #ifdef SUPPORT_PCRE16
1257    printf("  -16      use 16-bit interface\n");
1258    #endif
1259    printf("  -b       show compiled code (bytecode)\n");
1260    printf("  -C       show PCRE compile-time options and exit\n");
1261    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1262    #if !defined NODFA
1263    printf("  -dfa     force DFA matching for all subjects\n");
1264    #endif
1265    printf("  -help    show usage information\n");
1266    printf("  -i       show information about compiled patterns\n"
1267           "  -M       find MATCH_LIMIT minimum for each subject\n"
1268           "  -m       output memory used information\n"
1269           "  -o <n>   set size of offsets vector to <n>\n");
1270    #if !defined NOPOSIX
1271    printf("  -p       use POSIX interface\n");
1272    #endif
1273    printf("  -q       quiet: do not output PCRE version number at start\n");
1274    printf("  -S <n>   set stack size to <n> megabytes\n");
1275    printf("  -s       force each pattern to be studied at basic level\n"
1276           "  -s+      force each pattern to be studied, using JIT if available\n"
1277           "  -t       time compilation and execution\n");
1278    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1279    printf("  -tm      time execution (matching) only\n");
1280    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1281  }  }
1282    
1283    
1284    
1285    /*************************************************
1286    *                Main Program                    *
1287    *************************************************/
1288    
1289  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
1290  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 429  int main(int argc, char **argv) Line 1295  int main(int argc, char **argv)
1295  FILE *infile = stdin;  FILE *infile = stdin;
1296  int options = 0;  int options = 0;
1297  int study_options = 0;  int study_options = 0;
1298    int default_find_match_limit = FALSE;
1299  int op = 1;  int op = 1;
1300  int timeit = 0;  int timeit = 0;
1301    int timeitm = 0;
1302  int showinfo = 0;  int showinfo = 0;
1303  int showstore = 0;  int showstore = 0;
1304    int force_study = -1;
1305    int force_study_options = 0;
1306    int quiet = 0;
1307  int size_offsets = 45;  int size_offsets = 45;
1308  int size_offsets_max;  int size_offsets_max;
1309  int *offsets;  int *offsets = NULL;
1310  #if !defined NOPOSIX  #if !defined NOPOSIX
1311  int posix = 0;  int posix = 0;
1312  #endif  #endif
1313  int debug = 0;  int debug = 0;
1314  int done = 0;  int done = 0;
1315  unsigned char buffer[30000];  int all_use_dfa = 0;
1316  unsigned char dbuffer[1024];  int use_pcre16 = 0;
1317    int yield = 0;
1318    int stack_size;
1319    
1320    pcre_jit_stack *jit_stack = NULL;
1321    
1322    /* These vectors store, end-to-end, a list of captured substring names. Assume
1323    that 1024 is plenty long enough for the few names we'll be testing. */
1324    
1325    pcre_uchar copynames[1024];
1326    pcre_uchar getnames[1024];
1327    
1328  /* Static so that new_malloc can use it. */  pcre_uchar *copynamesptr;
1329    pcre_uchar *getnamesptr;
1330    
1331    /* Get buffers from malloc() so that valgrind will check their misuse when
1332    debugging. They grow automatically when very long lines are read. The 16-bit
1333    buffer (buffer16) is obtained only if needed. */
1334    
1335    buffer = (pcre_uint8 *)malloc(buffer_size);
1336    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1337    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1338    
1339    /* The outfile variable is static so that new_malloc can use it. */
1340    
1341  outfile = stdout;  outfile = stdout;
1342    
1343    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1344    library to translate CRLF into a single LF character. At least, that's what
1345    I've been told: never having used Windows I take this all on trust. Originally
1346    it set 0x8000, but then I was advised that _O_BINARY was better. */
1347    
1348    #if defined(_WIN32) || defined(WIN32)
1349    _setmode( _fileno( stdout ), _O_BINARY );
1350    #endif
1351    
1352  /* Scan options */  /* Scan options */
1353    
1354  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1355    {    {
1356    char *endptr;    unsigned char *endptr;
1357    
1358    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1359      showstore = 1;    else if (strcmp(argv[op], "-m") == 0) showstore = 1;
1360    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1361      else if (strcmp(argv[op], "-s+") == 0)
1362        {
1363        force_study = 1;
1364        force_study_options = PCRE_STUDY_JIT_COMPILE;
1365        }
1366      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1367      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1368    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1369    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1370      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1371    #if !defined NODFA
1372      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1373    #endif
1374    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1375        ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1376            *endptr == 0))
1377        {
1378        op++;
1379        argc--;
1380        }
1381      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1382        {
1383        int both = argv[op][2] == 0;
1384        int temp;
1385        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1386                         *endptr == 0))
1387          {
1388          timeitm = temp;
1389          op++;
1390          argc--;
1391          }
1392        else timeitm = LOOPREPEAT;
1393        if (both) timeit = timeitm;
1394        }
1395      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1396          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1397            *endptr == 0))
1398      {      {
1399    #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1400        printf("PCRE: -S not supported on this OS\n");
1401        exit(1);
1402    #else
1403        int rc;
1404        struct rlimit rlim;
1405        getrlimit(RLIMIT_STACK, &rlim);
1406        rlim.rlim_cur = stack_size * 1024 * 1024;
1407        rc = setrlimit(RLIMIT_STACK, &rlim);
1408        if (rc != 0)
1409          {
1410        printf("PCRE: setrlimit() failed with error %d\n", rc);
1411        exit(1);
1412          }
1413      op++;      op++;
1414      argc--;      argc--;
1415    #endif
1416      }      }
1417  #if !defined NOPOSIX  #if !defined NOPOSIX
1418    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1419  #endif  #endif
1420      else if (strcmp(argv[op], "-C") == 0)
1421        {
1422        int rc;
1423        unsigned long int lrc;
1424        printf("PCRE version %s\n", pcre_version());
1425        printf("Compiled with\n");
1426    
1427    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. */
1428    
1429    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1430        printf("  8-bit and 16-bit support\n");
1431        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1432        printf("  %sUTF-8 support\n", rc? "" : "No ");
1433        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1434        printf("  %sUTF-16 support\n", rc? "" : "No ");
1435    #elif defined SUPPORT_PCRE8
1436        printf("  8-bit support only\n");
1437        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1438        printf("  %sUTF-8 support\n", rc? "" : "No ");
1439    #else
1440        printf("  16-bit support only\n");
1441        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1442        printf("  %sUTF-16 support\n", rc? "" : "No ");
1443    #endif
1444    
1445        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1446        printf("  %sUnicode properties support\n", rc? "" : "No ");
1447        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1448        if (rc)
1449          printf("  Just-in-time compiler support\n");
1450        else
1451          printf("  No just-in-time compiler support\n");
1452        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1453        /* Note that these values are always the ASCII values, even
1454        in EBCDIC environments. CR is 13 and NL is 10. */
1455        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1456          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1457          (rc == -2)? "ANYCRLF" :
1458          (rc == -1)? "ANY" : "???");
1459        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1460        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1461                                         "all Unicode newlines");
1462        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1463        printf("  Internal link size = %d\n", rc);
1464        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1465        printf("  POSIX malloc threshold = %d\n", rc);
1466        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1467        printf("  Default match limit = %ld\n", lrc);
1468        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1469        printf("  Default recursion depth limit = %ld\n", lrc);
1470        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1471        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1472        goto EXIT;
1473        }
1474      else if (strcmp(argv[op], "-help") == 0 ||
1475               strcmp(argv[op], "--help") == 0)
1476        {
1477        usage();
1478        goto EXIT;
1479        }
1480    else    else
1481      {      {
1482      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1483      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1484      printf("  -d     debug: show compiled code; implies -i\n"      yield = 1;
1485             "  -i     show information about compiled pattern\n"      goto EXIT;
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
1486      }      }
1487    op++;    op++;
1488    argc--;    argc--;
1489    }    }
1490    
1491    /* Select which fullinfo function to use. */
1492    
1493    fullinfo = use_pcre16? pcre16_fullinfo : pcre_fullinfo;
1494    
1495  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
1496    
1497  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
1498  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
1499  if (offsets == NULL)  if (offsets == NULL)
1500    {    {
1501    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1502      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1503    return 1;    yield = 1;
1504      goto EXIT;
1505    }    }
1506    
1507  /* Sort out the input and output files */  /* Sort out the input and output files */
1508    
1509  if (argc > 1)  if (argc > 1)
1510    {    {
1511    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
1512    if (infile == NULL)    if (infile == NULL)
1513      {      {
1514      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1515      return 1;      yield = 1;
1516        goto EXIT;
1517      }      }
1518    }    }
1519    
1520  if (argc > 2)  if (argc > 2)
1521    {    {
1522    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1523    if (outfile == NULL)    if (outfile == NULL)
1524      {      {
1525      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1526      return 1;      yield = 1;
1527        goto EXIT;
1528      }      }
1529    }    }
1530    
1531  /* Set alternative malloc function */  /* Set alternative malloc function */
1532    
1533    #ifdef SUPPORT_PCRE8
1534  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1535    pcre_free = new_free;
1536    pcre_stack_malloc = stack_malloc;
1537    pcre_stack_free = stack_free;
1538    #endif
1539    
1540    #ifdef SUPPORT_PCRE16
1541    pcre16_malloc = new_malloc;
1542    pcre16_free = new_free;
1543    pcre16_stack_malloc = stack_malloc;
1544    pcre16_stack_free = stack_free;
1545    #endif
1546    
1547  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1548    
1549  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1550    
1551  /* Main loop */  /* Main loop */
1552    
# Line 540  while (!done) Line 1561  while (!done)
1561  #endif  #endif
1562    
1563    const char *error;    const char *error;
1564      unsigned char *markptr;
1565    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1566      unsigned char *to_file = NULL;
1567    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1568      unsigned long int true_size, true_study_size = 0;
1569      size_t size, regex_gotten_store;
1570      int do_allcaps = 0;
1571      int do_mark = 0;
1572    int do_study = 0;    int do_study = 0;
1573      int no_force_study = 0;
1574    int do_debug = debug;    int do_debug = debug;
1575    int do_G = 0;    int do_G = 0;
1576    int do_g = 0;    int do_g = 0;
1577    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1578    int do_showrest = 0;    int do_showrest = 0;
1579    int utf8 = 0;    int do_showcaprest = 0;
1580    int erroroffset, len, delimiter;    int do_flip = 0;
1581      int erroroffset, len, delimiter, poffset;
1582    
1583    if (infile == stdin) printf("  re> ");    use_utf8 = 0;
1584    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    debug_lengths = 1;
1585    
1586      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
1587    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1588      fflush(outfile);
1589    
1590    p = buffer;    p = buffer;
1591    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1592    if (*p == 0) continue;    if (*p == 0) continue;
1593    
1594    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1595    complete, read more. */  
1596      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1597        {
1598        unsigned long int magic, get_options;
1599        pcre_uint8 sbuf[8];
1600        FILE *f;
1601    
1602        p++;
1603        pp = p + (int)strlen((char *)p);
1604        while (isspace(pp[-1])) pp--;
1605        *pp = 0;
1606    
1607        f = fopen((char *)p, "rb");
1608        if (f == NULL)
1609          {
1610          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1611          continue;
1612          }
1613    
1614        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1615    
1616        true_size =
1617          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1618        true_study_size =
1619          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1620    
1621        re = (real_pcre *)new_malloc(true_size);
1622        regex_gotten_store = first_gotten_store;
1623    
1624        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1625    
1626        magic = ((real_pcre *)re)->magic_number;
1627        if (magic != MAGIC_NUMBER)
1628          {
1629          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1630            {
1631            do_flip = 1;
1632            }
1633          else
1634            {
1635            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1636            fclose(f);
1637            continue;
1638            }
1639          }
1640    
1641        fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1642          do_flip? " (byte-inverted)" : "", p);
1643    
1644        /* Need to know if UTF-8 for printing data strings */
1645    
1646        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1647        use_utf8 = (get_options & PCRE_UTF8) != 0;
1648    
1649        /* Now see if there is any following study data. */
1650    
1651        if (true_study_size != 0)
1652          {
1653          pcre_study_data *psd;
1654    
1655          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1656          extra->flags = PCRE_EXTRA_STUDY_DATA;
1657    
1658          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1659          extra->study_data = psd;
1660    
1661          if (fread(psd, 1, true_study_size, f) != true_study_size)
1662            {
1663            FAIL_READ:
1664            fprintf(outfile, "Failed to read data from %s\n", p);
1665            if (extra != NULL) pcre_free_study(extra);
1666            if (re != NULL) new_free(re);
1667            fclose(f);
1668            continue;
1669            }
1670          fprintf(outfile, "Study data loaded from %s\n", p);
1671          do_study = 1;     /* To get the data output if requested */
1672          }
1673        else fprintf(outfile, "No study data\n");
1674    
1675        fclose(f);
1676        goto SHOW_INFO;
1677        }
1678    
1679      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1680      the pattern; if is isn't complete, read more. */
1681    
1682    delimiter = *p++;    delimiter = *p++;
1683    
1684    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1685      {      {
1686      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1687      goto SKIP_DATA;      goto SKIP_DATA;
1688      }      }
1689    
1690    pp = p;    pp = p;
1691      poffset = (int)(p - buffer);
1692    
1693    for(;;)    for(;;)
1694      {      {
# Line 581  while (!done) Line 1699  while (!done)
1699        pp++;        pp++;
1700        }        }
1701      if (*pp != 0) break;      if (*pp != 0) break;
1702        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1703        {        {
1704        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1705        done = 1;        done = 1;
# Line 599  while (!done) Line 1708  while (!done)
1708      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1709      }      }
1710    
1711      /* The buffer may have moved while being extended; reset the start of data
1712      pointer to the correct relative point in the buffer. */
1713    
1714      p = buffer + poffset;
1715    
1716    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1717    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1718    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1719    
1720    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1721    
1722    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1723      for callouts. */
1724    
1725    *pp++ = 0;    *pp++ = 0;
1726      strcpy((char *)pbuffer, (char *)p);
1727    
1728    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1729    
# Line 619  while (!done) Line 1735  while (!done)
1735      {      {
1736      switch (*pp++)      switch (*pp++)
1737        {        {
1738          case 'f': options |= PCRE_FIRSTLINE; break;
1739        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1740        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1741        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1742        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1743        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1744    
1745        case '+': do_showrest = 1; break;        case '+':
1746          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1747          break;
1748    
1749          case '=': do_allcaps = 1; break;
1750        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1751          case 'B': do_debug = 1; break;
1752          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1753        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1754        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1755          case 'F': do_flip = 1; break;
1756        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1757        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1758          case 'J': options |= PCRE_DUPNAMES; break;
1759          case 'K': do_mark = 1; break;
1760        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1761          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1762    
1763  #if !defined NOPOSIX  #if !defined NOPOSIX
1764        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1765  #endif  #endif
1766    
1767        case 'S': do_study = 1; break;        case 'S':
1768          if (do_study == 0)
1769            {
1770            do_study = 1;
1771            if (*pp == '+')
1772              {
1773              study_options |= PCRE_STUDY_JIT_COMPILE;
1774              pp++;
1775              }
1776            }
1777          else
1778            {
1779            do_study = 0;
1780            no_force_study = 1;
1781            }
1782          break;
1783    
1784        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1785          case 'W': options |= PCRE_UCP; break;
1786        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1787        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1788          case 'Z': debug_lengths = 0; break;
1789          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1790          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1791    
1792          case 'T':
1793          switch (*pp++)
1794            {
1795            case '0': tables = tables0; break;
1796            case '1': tables = tables1; break;
1797    
1798            case '\r':
1799            case '\n':
1800            case ' ':
1801            case 0:
1802            fprintf(outfile, "** Missing table number after /T\n");
1803            goto SKIP_DATA;
1804    
1805            default:
1806            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1807            goto SKIP_DATA;
1808            }
1809          break;
1810    
1811        case 'L':        case 'L':
1812        ppp = pp;        ppp = pp;
1813        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1814          /* The '0' test is just in case this is an unterminated line. */
1815          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1816        *ppp = 0;        *ppp = 0;
1817        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1818          {          {
1819          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1820          goto SKIP_DATA;          goto SKIP_DATA;
1821          }          }
1822          locale_set = 1;
1823        tables = pcre_maketables();        tables = pcre_maketables();
1824        pp = ppp;        pp = ppp;
1825        break;        break;
1826    
1827        case '\n': case ' ': break;        case '>':
1828          to_file = pp;
1829          while (*pp != 0) pp++;
1830          while (isspace(pp[-1])) pp--;
1831          *pp = 0;
1832          break;
1833    
1834          case '<':
1835            {
1836            if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1837              {
1838              options |= PCRE_JAVASCRIPT_COMPAT;
1839              pp += 3;
1840              }
1841            else
1842              {
1843              int x = check_newline(pp, outfile);
1844              if (x == 0) goto SKIP_DATA;
1845              options |= x;
1846              while (*pp++ != '>');
1847              }
1848            }
1849          break;
1850    
1851          case '\r':                      /* So that it works in Windows */
1852          case '\n':
1853          case ' ':
1854          break;
1855    
1856        default:        default:
1857        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1858        goto SKIP_DATA;        goto SKIP_DATA;
# Line 664  while (!done) Line 1861  while (!done)
1861    
1862    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1863    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
1864    local character tables. */    local character tables. Neither does it have 16-bit support. */
1865    
1866  #if !defined NOPOSIX  #if !defined NOPOSIX
1867    if (posix || do_posix)    if (posix || do_posix)
1868      {      {
1869      int rc;      int rc;
1870      int cflags = 0;      int cflags = 0;
1871    
1872      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1873      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1874        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1875        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1876        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1877        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1878        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1879    
1880        first_gotten_store = 0;
1881      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1882    
1883      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 680  while (!done) Line 1885  while (!done)
1885    
1886      if (rc != 0)      if (rc != 0)
1887        {        {
1888        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1889        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1890        goto SKIP_DATA;        goto SKIP_DATA;
1891        }        }
# Line 692  while (!done) Line 1897  while (!done)
1897  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1898    
1899      {      {
1900      if (timeit)      unsigned long int get_options;
1901    
1902        /* In 16-bit mode, convert the input. The space needed for a non-UTF string
1903        is exactly double the 8-bit size. For a UTF-8 string, the size needed for
1904        UTF-16 is no more than double, because up to 0xffff uses no more than 3
1905        bytes in UTF-8 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8
1906        and up to 4 bytes in UTF-16. */
1907    
1908    #ifdef SUPPORT_PCRE16
1909        if (use_pcre16) (void)to16(p, options & PCRE_UTF8);
1910    #endif
1911    
1912        /* Compile many times when timing */
1913    
1914        if (timeit > 0)
1915        {        {
1916        register int i;        register int i;
1917        clock_t time_taken;        clock_t time_taken;
1918        clock_t start_time = clock();        clock_t start_time = clock();
1919        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1920          {          {
1921          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);  #ifdef SUPPORT_PCRE16
1922            if (use_pcre16)
1923              re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
1924            else
1925    #endif
1926              re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1927          if (re != NULL) free(re);          if (re != NULL) free(re);
1928          }          }
1929        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1930        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1931          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1932          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1933        }        }
1934    
1935      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
1936    
1937    #ifdef SUPPORT_PCRE16
1938        if (use_pcre16)
1939          re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);
1940        else
1941    #endif
1942          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1943    
1944      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1945      if non-interactive. */      if non-interactive. */
# Line 721  while (!done) Line 1952  while (!done)
1952          {          {
1953          for (;;)          for (;;)
1954            {            {
1955            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1956              {              {
1957              done = 1;              done = 1;
1958              goto CONTINUE;              goto CONTINUE;
# Line 735  while (!done) Line 1966  while (!done)
1966        goto CONTINUE;        goto CONTINUE;
1967        }        }
1968    
1969      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1970      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1971      returns only limited data. Check that it agrees with the newer one. */      lines. */
1972    
1973        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1974        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1975    
1976        /* Extract the size for possible writing before possibly flipping it,
1977        and remember the store that was got. */
1978    
1979        true_size = ((real_pcre *)re)->size;
1980        regex_gotten_store = first_gotten_store;
1981    
1982        /* Output code size information if requested */
1983    
1984        if (log_store)
1985          fprintf(outfile, "Memory allocation (code space): %d\n",
1986            (int)(first_gotten_store -
1987                  sizeof(real_pcre) -
1988                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1989    
1990        /* If -s or /S was present, study the regex to generate additional info to
1991        help with the matching, unless the pattern has the SS option, which
1992        suppresses the effect of /S (used for a few test patterns where studying is
1993        never sensible). */
1994    
1995        if (do_study || (force_study >= 0 && !no_force_study))
1996          {
1997          if (timeit > 0)
1998            {
1999            register int i;
2000            clock_t time_taken;
2001            clock_t start_time = clock();
2002            for (i = 0; i < timeit; i++)
2003              {
2004              if (use_pcre16)
2005                extra = pcre16_study(re, study_options | force_study_options, &error);
2006              else
2007                extra = pcre_study(re, study_options | force_study_options, &error);
2008              }
2009            time_taken = clock() - start_time;
2010            if (extra != NULL) pcre_free_study(extra);
2011            fprintf(outfile, "  Study time %.4f milliseconds\n",
2012              (((double)time_taken * 1000.0) / (double)timeit) /
2013                (double)CLOCKS_PER_SEC);
2014            }
2015          if (use_pcre16)
2016            extra = pcre16_study(re, study_options | force_study_options, &error);
2017          else
2018            extra = pcre_study(re, study_options | force_study_options, &error);
2019          if (error != NULL)
2020            fprintf(outfile, "Failed to study: %s\n", error);
2021          else if (extra != NULL)
2022            {
2023            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2024            if (log_store)
2025              {
2026              size_t jitsize;
2027              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2028              if (jitsize != 0)
2029                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2030              }
2031            }
2032          }
2033    
2034        /* If /K was present, we set up for handling MARK data. */
2035    
2036        if (do_mark)
2037          {
2038          if (extra == NULL)
2039            {
2040            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2041            extra->flags = 0;
2042            }
2043          extra->mark = &markptr;
2044          extra->flags |= PCRE_EXTRA_MARK;
2045          }
2046    
2047        /* If the 'F' option was present, we flip the bytes of all the integer
2048        fields in the regex data block and the study block. This is to make it
2049        possible to test PCRE's handling of byte-flipped patterns, e.g. those
2050        compiled on a different architecture. */
2051    
2052        if (do_flip)
2053          {
2054          real_pcre *rre = (real_pcre *)re;
2055          rre->magic_number =
2056            byteflip(rre->magic_number, sizeof(rre->magic_number));
2057          rre->size = byteflip(rre->size, sizeof(rre->size));
2058          rre->options = byteflip(rre->options, sizeof(rre->options));
2059          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2060          rre->top_bracket =
2061            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2062          rre->top_backref =
2063            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2064          rre->first_char =
2065            (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2066          rre->req_char =
2067            (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2068          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2069            sizeof(rre->name_table_offset));
2070          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2071            sizeof(rre->name_entry_size));
2072          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2073            sizeof(rre->name_count));
2074    
2075          if (extra != NULL)
2076            {
2077            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2078            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2079            rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2080            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2081            }
2082          }
2083    
2084        /* Extract and display information from the compiled data if required. */
2085    
2086        SHOW_INFO:
2087    
2088        if (do_debug)
2089          {
2090          fprintf(outfile, "------------------------------------------------------------------\n");
2091          if (use_pcre16)
2092            pcre16_printint(re, outfile, debug_lengths);
2093          else
2094            pcre_printint(re, outfile, debug_lengths);
2095          }
2096    
2097        /* We already have the options in get_options (see above) */
2098    
2099      if (do_showinfo)      if (do_showinfo)
2100        {        {
2101        unsigned long int get_options;        unsigned long int all_options;
2102    #if !defined NOINFOCHECK
2103        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
2104        int count, backrefmax, first_char, need_char;  #endif
2105        size_t size;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
2106            hascrorlf;
2107        if (do_debug) print_internals(re);        int nameentrysize, namecount;
2108          const pcre_uchar *nametable;
2109    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
2110        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2111        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2112        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2113        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2114        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2115          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2116          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2117          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2118          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2119          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2120          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2121    
2122          /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2123          that it gives the same results as the new function. */
2124    
2125    #if !defined NOINFOCHECK
2126          if (!use_pcre16)
2127            {
2128            old_count = pcre_info(re, &old_options, &old_first_char);
2129            if (count < 0) fprintf(outfile,
2130              "Error %d from pcre_info()\n", count);
2131            else
2132              {
2133              if (old_count != count) fprintf(outfile,
2134                "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2135                  old_count);
2136    
2137              if (old_first_char != first_char) fprintf(outfile,
2138                "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2139                  first_char, old_first_char);
2140    
2141              if (old_options != (int)get_options) fprintf(outfile,
2142                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2143                  get_options, old_options);
2144              }
2145            }
2146    #endif
2147    
2148        old_count = pcre_info(re, &old_options, &old_first_char);        if (size != regex_gotten_store) fprintf(outfile,
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
   
       if (size != gotten_store) fprintf(outfile,  
2149          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2150          size, gotten_store);          (int)size, (int)regex_gotten_store);
2151    
2152        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
2153        if (backrefmax > 0)        if (backrefmax > 0)
2154          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
2155    
2156          if (namecount > 0)
2157            {
2158            fprintf(outfile, "Named capturing subpatterns:\n");
2159            while (namecount-- > 0)
2160              {
2161              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
2162                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2163                GET2(nametable, 0));
2164              nametable += nameentrysize;
2165              }
2166            }
2167    
2168          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2169          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2170    
2171          all_options = ((real_pcre *)re)->options;
2172          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2173    
2174        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2175          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2176            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2177            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2178            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2179            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2180              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2181            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2182              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2183              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2184            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2185            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2186            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2187            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2188              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2189              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2190              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2191              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2192              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2193    
2194          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2195    
2196          switch (get_options & PCRE_NEWLINE_BITS)
2197            {
2198            case PCRE_NEWLINE_CR:
2199            fprintf(outfile, "Forced newline sequence: CR\n");
2200            break;
2201    
2202            case PCRE_NEWLINE_LF:
2203            fprintf(outfile, "Forced newline sequence: LF\n");
2204            break;
2205    
2206        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_CRLF:
2207          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
2208            break;
2209    
2210            case PCRE_NEWLINE_ANYCRLF:
2211            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2212            break;
2213    
2214            case PCRE_NEWLINE_ANY:
2215            fprintf(outfile, "Forced newline sequence: ANY\n");
2216            break;
2217    
2218            default:
2219            break;
2220            }
2221    
2222        if (first_char == -1)        if (first_char == -1)
2223          {          {
2224          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
2225          }          }
2226        else if (first_char < 0)        else if (first_char < 0)
2227          {          {
# Line 805  while (!done) Line 2229  while (!done)
2229          }          }
2230        else        else
2231          {          {
2232          if (isprint(first_char))          const char *caseless =
2233            fprintf(outfile, "First char = \'%c\'\n", first_char);            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2234              "" : " (caseless)";
2235    
2236            if (PRINTHEX(first_char))
2237              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2238          else          else
2239            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2240          }          }
2241    
2242        if (need_char < 0)        if (need_char < 0)
# Line 817  while (!done) Line 2245  while (!done)
2245          }          }
2246        else        else
2247          {          {
2248          if (isprint(need_char))          const char *caseless =
2249            fprintf(outfile, "Need char = \'%c\'\n", need_char);            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2250              "" : " (caseless)";
2251    
2252            if (PRINTHEX(need_char))
2253              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2254          else          else
2255            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2256          }          }
       }  
2257    
2258      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
2259      help with the matching. */        value, but it varies, depending on the computer architecture, and
2260          so messes up the test suite. (And with the /F option, it might be
2261          flipped.) If study was forced by an external -s, don't show this
2262          information unless -i or -d was also present. This means that, except
2263          when auto-callouts are involved, the output from runs with and without
2264          -s should be identical. */
2265    
2266      if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
       {  
       if (timeit)  
2267          {          {
2268          register int i;          if (extra == NULL)
2269          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
2270          clock_t start_time = clock();          else
2271          for (i = 0; i < LOOPREPEAT; i++)            {
2272            extra = pcre_study(re, study_options, &error);            pcre_uint8 *start_bits = NULL;
2273          time_taken = clock() - start_time;            int minlength;
2274          if (extra != NULL) free(extra);  
2275          fprintf(outfile, "  Study time %.3f milliseconds\n",            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2276            ((double)time_taken * 1000.0)/            fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2277            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
2278              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2279              if (start_bits == NULL)
2280                fprintf(outfile, "No set of starting bytes\n");
2281              else
2282                {
2283                int i;
2284                int c = 24;
2285                fprintf(outfile, "Starting byte set: ");
2286                for (i = 0; i < 256; i++)
2287                  {
2288                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
2289                    {
2290                    if (c > 75)
2291                      {
2292                      fprintf(outfile, "\n  ");
2293                      c = 2;
2294                      }
2295                    if (PRINTHEX(i) && i != ' ')
2296                      {
2297                      fprintf(outfile, "%c ", i);
2298                      c += 2;
2299                      }
2300                    else
2301                      {
2302                      fprintf(outfile, "\\x%02x ", i);
2303                      c += 5;
2304                      }
2305                    }
2306                  }
2307                fprintf(outfile, "\n");
2308                }
2309              }
2310    
2311            /* Show this only if the JIT was set by /S, not by -s. */
2312    
2313            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2314              {
2315              int jit;
2316              new_info(re, extra, PCRE_INFO_JIT, &jit);
2317              if (jit)
2318                fprintf(outfile, "JIT study was successful\n");
2319              else
2320    #ifdef SUPPORT_JIT
2321                fprintf(outfile, "JIT study was not successful\n");
2322    #else
2323                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2324    #endif
2325              }
2326          }          }
2327          }
2328    
2329        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
2330        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
2331          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
2332    
2333        else if (do_showinfo)      if (to_file != NULL)
2334          {
2335          FILE *f = fopen((char *)to_file, "wb");
2336          if (f == NULL)
2337            {
2338            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2339            }
2340          else
2341          {          {
2342          uschar *start_bits = NULL;          pcre_uint8 sbuf[8];
2343          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2344          if (start_bits == NULL)          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2345            fprintf(outfile, "No starting character set\n");          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2346            sbuf[3] = (pcre_uint8)((true_size) & 255);
2347    
2348            sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2349            sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2350            sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2351            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2352    
2353            if (fwrite(sbuf, 1, 8, f) < 8 ||
2354                fwrite(re, 1, true_size, f) < true_size)
2355              {
2356              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2357              }
2358          else          else
2359            {            {
2360            int i;            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2361            int c = 24;  
2362            fprintf(outfile, "Starting character set: ");            /* If there is study data, write it. */
2363            for (i = 0; i < 256; i++)  
2364              if (extra != NULL)
2365              {              {
2366              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
2367                    true_study_size)
2368                {                {
2369                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
2370                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
2371                }                }
2372                else fprintf(outfile, "Study data written to %s\n", to_file);
2373              }              }
           fprintf(outfile, "\n");  
2374            }            }
2375            fclose(f);
2376          }          }
2377    
2378          new_free(re);
2379          if (extra != NULL) pcre_free_study(extra);
2380          if (locale_set)
2381            {
2382            new_free((void *)tables);
2383            setlocale(LC_CTYPE, "C");
2384            locale_set = 0;
2385            }
2386          continue;  /* With next regex */
2387        }        }
2388      }      }        /* End of non-POSIX compile */
2389    
2390    /* Read data lines and test them */    /* Read data lines and test them */
2391    
2392    for (;;)    for (;;)
2393      {      {
2394      unsigned char *q;      pcre_uint8 *q;
2395      unsigned char *bptr = dbuffer;      pcre_uint8 *bptr;
2396      int *use_offsets = offsets;      int *use_offsets = offsets;
2397      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2398        int callout_data = 0;
2399        int callout_data_set = 0;
2400      int count, c;      int count, c;
2401      int copystrings = 0;      int copystrings = 0;
2402        int find_match_limit = default_find_match_limit;
2403      int getstrings = 0;      int getstrings = 0;
2404      int getlist = 0;      int getlist = 0;
2405      int gmatched = 0;      int gmatched = 0;
2406      int start_offset = 0;      int start_offset = 0;
2407        int start_offset_sign = 1;
2408      int g_notempty = 0;      int g_notempty = 0;
2409        int use_dfa = 0;
2410    
2411      options = 0;      options = 0;
2412    
2413      if (infile == stdin) printf("data> ");      *copynames = 0;
2414      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
2415    
2416        copynamesptr = copynames;
2417        getnamesptr = getnames;
2418    
2419        pcre_callout = callout;
2420        first_callout = 1;
2421        last_callout_mark = NULL;
2422        callout_extra = 0;
2423        callout_count = 0;
2424        callout_fail_count = 999999;
2425        callout_fail_id = -1;
2426        show_malloc = 0;
2427    
2428        if (extra != NULL) extra->flags &=
2429          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2430    
2431        len = 0;
2432        for (;;)
2433        {        {
2434        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2435        goto CONTINUE;          {
2436            if (len > 0)    /* Reached EOF without hitting a newline */
2437              {
2438              fprintf(outfile, "\n");
2439              break;
2440              }
2441            done = 1;
2442            goto CONTINUE;
2443            }
2444          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2445          len = (int)strlen((char *)buffer);
2446          if (buffer[len-1] == '\n') break;
2447        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2448    
     len = (int)strlen((char *)buffer);  
2449      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2450      buffer[len] = 0;      buffer[len] = 0;
2451      if (len == 0) break;      if (len == 0) break;
# Line 921  while (!done) Line 2453  while (!done)
2453      p = buffer;      p = buffer;
2454      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2455    
2456      q = dbuffer;      bptr = q = dbuffer;
2457      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2458        {        {
2459        int i = 0;        int i = 0;
2460        int n = 0;        int n = 0;
2461    
2462        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
2463          {          {
2464          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 942  while (!done) Line 2475  while (!done)
2475          c -= '0';          c -= '0';
2476          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2477            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2478    
2479    #if !defined NOUTF8
2480            if (use_utf8 && c > 255)
2481              {
2482              unsigned char buff8[8];
2483              int ii, utn;
2484              utn = ord2utf8(c, buff8);
2485              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2486              c = buff8[ii];   /* Last byte */
2487              }
2488    #endif
2489          break;          break;
2490    
2491          case 'x':          case 'x':
2492    
2493          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
2494    
2495    #if !defined NOUTF8
2496          if (*p == '{')          if (*p == '{')
2497            {            {
2498            unsigned char *pt = p;            unsigned char *pt = p;
2499            c = 0;            c = 0;
2500            while (isxdigit(*(++pt)))  
2501              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2502              when isxdigit() is a macro that refers to its argument more than
2503              once. This is banned by the C Standard, but apparently happens in at
2504              least one MacOS environment. */
2505    
2506              for (pt++; isxdigit(*pt); pt++)
2507                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2508            if (*pt == '}')            if (*pt == '}')
2509              {              {
2510              unsigned char buffer[8];              unsigned char buff8[8];
2511              int ii, utn;              int ii, utn;
2512              utn = ord2utf8(c, buffer);              if (use_utf8)
2513              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];                {
2514              c = buffer[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2515                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2516                  c = buff8[ii];   /* Last byte */
2517                  }
2518                else
2519                 {
2520                 if (c > 255)
2521                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2522                     "UTF-8 mode is not enabled.\n"
2523                     "** Truncation will probably give the wrong result.\n", c);
2524                 }
2525              p = pt + 1;              p = pt + 1;
2526              break;              break;
2527              }              }
2528            /* Not correct form; fall through */            /* Not correct form; fall through */
2529            }            }
2530    #endif
2531    
2532          /* Ordinary \x */          /* Ordinary \x */
2533    
2534          c = 0;          c = 0;
2535          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2536            {            {
2537            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2538            p++;            p++;
2539            }            }
2540          break;          break;
2541    
2542          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
2543          p--;          p--;
2544          continue;          continue;
2545    
2546            case '>':
2547            if (*p == '-')
2548              {
2549              start_offset_sign = -1;
2550              p++;
2551              }
2552            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2553            start_offset *= start_offset_sign;
2554            continue;
2555    
2556          case 'A':  /* Option setting */          case 'A':  /* Option setting */
2557          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
2558          continue;          continue;
# Line 990  while (!done) Line 2562  while (!done)
2562          continue;          continue;
2563    
2564          case 'C':          case 'C':
2565          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
2566          copystrings |= 1 << n;            {
2567              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2568              copystrings |= 1 << n;
2569              }
2570            else if (isalnum(*p))
2571              {
2572              pcre_uchar *npp = copynamesptr;
2573              while (isalnum(*p)) *npp++ = *p++;
2574              *npp++ = 0;
2575              *npp = 0;
2576              n = pcre_get_stringnumber(re, (char *)copynamesptr);
2577              if (n < 0)
2578                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2579              copynamesptr = npp;
2580              }
2581            else if (*p == '+')
2582              {
2583              callout_extra = 1;
2584              p++;
2585              }
2586            else if (*p == '-')
2587              {
2588              pcre_callout = NULL;
2589              p++;
2590              }
2591            else if (*p == '!')
2592              {
2593              callout_fail_id = 0;
2594              p++;
2595              while(isdigit(*p))
2596                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2597              callout_fail_count = 0;
2598              if (*p == '!')
2599                {
2600                p++;
2601                while(isdigit(*p))
2602                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2603                }
2604              }
2605            else if (*p == '*')
2606              {
2607              int sign = 1;
2608              callout_data = 0;
2609              if (*(++p) == '-') { sign = -1; p++; }
2610              while(isdigit(*p))
2611                callout_data = callout_data * 10 + *p++ - '0';
2612              callout_data *= sign;
2613              callout_data_set = 1;
2614              }
2615            continue;
2616    
2617    #if !defined NODFA
2618            case 'D':
2619    #if !defined NOPOSIX
2620            if (posix || do_posix)
2621              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2622            else
2623    #endif
2624              use_dfa = 1;
2625            continue;
2626    #endif
2627    
2628    #if !defined NODFA
2629            case 'F':
2630            options |= PCRE_DFA_SHORTEST;
2631          continue;          continue;
2632    #endif
2633    
2634          case 'G':          case 'G':
2635            if (isdigit(*p))
2636              {
2637              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2638              getstrings |= 1 << n;
2639              }
2640            else if (isalnum(*p))
2641              {
2642              pcre_uchar *npp = getnamesptr;
2643              while (isalnum(*p)) *npp++ = *p++;
2644              *npp++ = 0;
2645              *npp = 0;
2646              n = pcre_get_stringnumber(re, (char *)getnamesptr);
2647              if (n < 0)
2648                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2649              getnamesptr = npp;
2650              }
2651            continue;
2652    
2653            case 'J':
2654          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
2655          getstrings |= 1 << n;          if (extra != NULL
2656                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2657                && extra->executable_jit != NULL)
2658              {
2659              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2660              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2661              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2662              }
2663          continue;          continue;
2664    
2665          case 'L':          case 'L':
2666          getlist = 1;          getlist = 1;
2667          continue;          continue;
2668    
2669            case 'M':
2670            find_match_limit = 1;
2671            continue;
2672    
2673          case 'N':          case 'N':
2674          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2675              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2676            else
2677              options |= PCRE_NOTEMPTY;
2678          continue;          continue;
2679    
2680          case 'O':          case 'O':
# Line 1013  while (!done) Line 2683  while (!done)
2683            {            {
2684            size_offsets_max = n;            size_offsets_max = n;
2685            free(offsets);            free(offsets);
2686            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2687            if (offsets == NULL)            if (offsets == NULL)
2688              {              {
2689              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2690                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2691              return 1;              yield = 1;
2692                goto EXIT;
2693              }              }
2694            }            }
2695          use_size_offsets = n;          use_size_offsets = n;
2696          if (n == 0) use_offsets = NULL;          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
2697            continue;
2698    
2699            case 'P':
2700            options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2701              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2702            continue;
2703    
2704            case 'Q':
2705            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2706            if (extra == NULL)
2707              {
2708              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2709              extra->flags = 0;
2710              }
2711            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2712            extra->match_limit_recursion = n;
2713            continue;
2714    
2715            case 'q':
2716            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2717            if (extra == NULL)
2718              {
2719              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2720              extra->flags = 0;
2721              }
2722            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2723            extra->match_limit = n;
2724            continue;
2725    
2726    #if !defined NODFA
2727            case 'R':
2728            options |= PCRE_DFA_RESTART;
2729            continue;
2730    #endif
2731    
2732            case 'S':
2733            show_malloc = 1;
2734            continue;
2735    
2736            case 'Y':
2737            options |= PCRE_NO_START_OPTIMIZE;
2738          continue;          continue;
2739    
2740          case 'Z':          case 'Z':
2741          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2742          continue;          continue;
2743    
2744            case '?':
2745            options |= PCRE_NO_UTF8_CHECK;
2746            continue;
2747    
2748            case '<':
2749              {
2750              int x = check_newline(p, outfile);
2751              if (x == 0) goto NEXT_DATA;
2752              options |= x;
2753              while (*p++ != '>');
2754              }
2755            continue;
2756          }          }
2757        *q++ = c;        *q++ = c;
2758        }        }
2759      *q = 0;      *q = 0;
2760      len = q - dbuffer;      len = (int)(q - dbuffer);
2761    
2762        /* Move the data to the end of the buffer so that a read over the end of
2763        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2764        we are using the POSIX interface, we must include the terminating zero. */
2765    
2766    #if !defined NOPOSIX
2767        if (posix || do_posix)
2768          {
2769          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2770          bptr += buffer_size - len - 1;
2771          }
2772        else
2773    #endif
2774          {
2775          memmove(bptr + buffer_size - len, bptr, len);
2776          bptr += buffer_size - len;
2777          }
2778    
2779        if ((all_use_dfa || use_dfa) && find_match_limit)
2780          {
2781          printf("**Match limit not relevant for DFA matching: ignored\n");
2782          find_match_limit = 0;
2783          }
2784    
2785      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2786      support timing. */      support timing or playing with the match limit or callout data. */
2787    
2788  #if !defined NOPOSIX  #if !defined NOPOSIX
2789      if (posix || do_posix)      if (posix || do_posix)
2790        {        {
2791        int rc;        int rc;
2792        int eflags = 0;        int eflags = 0;
2793        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
2794          if (use_size_offsets > 0)
2795            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2796        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2797        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2798          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2799    
2800        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2801    
2802        if (rc != 0)        if (rc != 0)
2803          {          {
2804          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2805          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2806          }          }
2807          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2808                  != 0)
2809            {
2810            fprintf(outfile, "Matched with REG_NOSUB\n");
2811            }
2812        else        else
2813          {          {
2814          size_t i;          size_t i;
2815          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2816            {            {
2817            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2818              {              {
2819              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
2820              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2821                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2822              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2823              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
2824                {                {
2825                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
2826                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2827                    outfile);
2828                fprintf(outfile, "\n");                fprintf(outfile, "\n");
2829                }                }
2830              }              }
# Line 1083  while (!done) Line 2840  while (!done)
2840    
2841      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2842        {        {
2843        if (timeit)        markptr = NULL;
2844    
2845          if (timeitm > 0)
2846          {          {
2847          register int i;          register int i;
2848          clock_t time_taken;          clock_t time_taken;
2849          clock_t start_time = clock();          clock_t start_time = clock();
2850          for (i = 0; i < LOOPREPEAT; i++)  
2851    #ifdef SUPPORT_PCRE16
2852            if (use_pcre16) len = to16(bptr, options & PCRE_UTF8);
2853    #endif
2854    
2855    
2856    #if !defined NODFA
2857            if (all_use_dfa || use_dfa)
2858              {
2859              int workspace[1000];
2860              for (i = 0; i < timeitm; i++)
2861                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2862                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2863                  sizeof(workspace)/sizeof(int));
2864              }
2865            else
2866    #endif
2867    
2868            for (i = 0; i < timeitm; i++)
2869            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2870              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2871    
2872          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2873          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2874            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2875            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2876            }
2877    
2878          /* If find_match_limit is set, we want to do repeated matches with
2879          varying limits in order to find the minimum value for the match limit and
2880          for the recursion limit. The match limits are relevant only to the normal
2881          running of pcre_exec(), so disable the JIT optimization. This makes it
2882          possible to run the same set of tests with and without JIT externally
2883          requested. */
2884    
2885          if (find_match_limit)
2886            {
2887            if (extra == NULL)
2888              {
2889              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2890              extra->flags = 0;
2891              }
2892            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2893    
2894            (void)check_match_limit(re, extra, bptr, len, start_offset,
2895              options|g_notempty, use_offsets, use_size_offsets,
2896              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2897              PCRE_ERROR_MATCHLIMIT, "match()");
2898    
2899            count = check_match_limit(re, extra, bptr, len, start_offset,
2900              options|g_notempty, use_offsets, use_size_offsets,
2901              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2902              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2903          }          }
2904    
2905        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
2906          start_offset, options | g_notempty, use_offsets, use_size_offsets);  
2907          else if (callout_data_set)
2908            {
2909            if (extra == NULL)
2910              {
2911              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2912              extra->flags = 0;
2913              }
2914            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2915            extra->callout_data = &callout_data;
2916            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2917              options | g_notempty, use_offsets, use_size_offsets);
2918            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2919            }
2920    
2921          /* The normal case is just to do the match once, with the default
2922          value of match_limit. */
2923    
2924    #if !defined NODFA
2925          else if (all_use_dfa || use_dfa)
2926            {
2927            int workspace[1000];
2928            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2929              options | g_notempty, use_offsets, use_size_offsets, workspace,
2930              sizeof(workspace)/sizeof(int));
2931            if (count == 0)
2932              {
2933              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2934              count = use_size_offsets/2;
2935              }
2936            }
2937    #endif
2938    
2939        if (count == 0)        else
2940          {          {
2941          fprintf(outfile, "Matched, but too many substrings\n");          if (use_pcre16)
2942          count = use_size_offsets/3;            count = pcre16_exec(re, extra, (PCRE_SPTR16)buffer16, len,
2943                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2944            else
2945              count = pcre_exec(re, extra, (char *)bptr, len,
2946                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2947            if (count == 0)
2948              {
2949              fprintf(outfile, "Matched, but too many substrings\n");
2950              count = use_size_offsets/3;
2951              }
2952          }          }
2953    
2954        /* Matched */        /* Matched */
2955    
2956        if (count >= 0)        if (count >= 0)
2957          {          {
2958          int i;          int i, maxcount;
2959    
2960    #if !defined NODFA
2961            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2962    #endif
2963              maxcount = use_size_offsets/3;
2964    
2965            /* This is a check against a lunatic return value. */
2966    
2967            if (count > maxcount)
2968              {
2969              fprintf(outfile,
2970                "** PCRE error: returned count %d is too big for offset size %d\n",
2971                count, use_size_offsets);
2972              count = use_size_offsets/3;
2973              if (do_g || do_G)
2974                {
2975                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2976                do_g = do_G = FALSE;        /* Break g/G loop */
2977                }
2978              }
2979    
2980            /* do_allcaps requests showing of all captures in the pattern, to check
2981            unset ones at the end. */
2982    
2983            if (do_allcaps)
2984              {
2985              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2986              count++;   /* Allow for full match */
2987              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2988              }
2989    
2990            /* Output the captured substrings */
2991    
2992          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2993            {            {
2994            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
2995                {
2996                if (use_offsets[i] != -1)
2997                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2998                    use_offsets[i], i);
2999                if (use_offsets[i+1] != -1)
3000                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3001                    use_offsets[i+1], i+1);
3002              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3003                }
3004            else            else
3005              {              {
3006              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3007              pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
3008                  use_offsets[i+1] - use_offsets[i], outfile);
3009              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3010              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3011                {                {
3012                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
3013                  {                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
3014                  fprintf(outfile, " 0+ ");                  outfile);
3015                  pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);                fprintf(outfile, "\n");
                 fprintf(outfile, "\n");  
                 }  
3016                }                }
3017              }              }
3018            }            }
3019    
3020            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3021    
3022          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3023            {            {
3024            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
3025              {              {
3026              char copybuffer[16];              char copybuffer[256];
3027              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3028                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
3029              if (rc < 0)              if (rc < 0)
# Line 1146  while (!done) Line 3033  while (!done)
3033              }              }
3034            }            }
3035    
3036            for (copynamesptr = copynames;
3037                 *copynamesptr != 0;
3038                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3039              {
3040              char copybuffer[256];
3041              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3042                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3043              if (rc < 0)
3044                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3045              else
3046                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3047              }
3048    
3049          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3050            {            {
3051            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1158  while (!done) Line 3058  while (!done)
3058              else              else
3059                {                {
3060                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
3061                pcre_free_substring(substring);                pcre_free_substring(substring);
3062                }                }
3063              }              }
3064            }            }
3065    
3066            for (getnamesptr = getnames;
3067                 *getnamesptr != 0;
3068                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3069              {
3070              const char *substring;
3071              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
3072                count, (char *)getnamesptr, &substring);
3073              if (rc < 0)
3074                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
3075              else
3076                {
3077                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
3078                pcre_free_substring(substring);
3079                }
3080              }
3081    
3082          if (getlist)          if (getlist)
3083            {            {
3084            const char **stringlist;            const char **stringlist;
# Line 1177  while (!done) Line 3092  while (!done)
3092                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3093              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
3094                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
3095              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
3096              }              }
3097            }            }
3098          }          }
3099    
3100          /* There was a partial match */
3101    
3102          else if (count == PCRE_ERROR_PARTIAL)
3103            {
3104            if (markptr == NULL) fprintf(outfile, "Partial match");
3105              else fprintf(outfile, "Partial match, mark=%s", markptr);
3106            if (use_size_offsets > 1)
3107              {
3108              fprintf(outfile, ": ");
3109              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3110                outfile);
3111              }
3112            fprintf(outfile, "\n");
3113            break;  /* Out of the /g loop */
3114            }
3115    
3116        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
3117        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
3118        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
3119        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
3120        was checked before setting g_notempty. */  
3121          Complication arises in the case when the newline convention is "any",
3122          "crlf", or "anycrlf". If the previous match was at the end of a line
3123          terminated by CRLF, an advance of one character just passes the \r,
3124          whereas we should prefer the longer newline sequence, as does the code in
3125          pcre_exec(). Fudge the offset value to achieve this. We check for a
3126          newline setting in the pattern; if none was set, use pcre_config() to
3127          find the default.
3128    
3129          Otherwise, in the case of UTF-8 matching, the advance must be one
3130          character, not one byte. */
3131    
3132        else        else
3133          {          {
3134          if (g_notempty != 0)          if (g_notempty != 0)
3135            {            {
3136              int onechar = 1;
3137              unsigned int obits = ((real_pcre *)re)->options;
3138            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
3139            use_offsets[1] = start_offset + 1;            if ((obits & PCRE_NEWLINE_BITS) == 0)
3140                {
3141                int d;
3142                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3143                /* Note that these values are always the ASCII ones, even in
3144                EBCDIC environments. CR = 13, NL = 10. */
3145                obits = (d == 13)? PCRE_NEWLINE_CR :
3146                        (d == 10)? PCRE_NEWLINE_LF :
3147                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3148                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
3149                        (d == -1)? PCRE_NEWLINE_ANY : 0;
3150                }
3151              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3152                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3153                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3154                  &&
3155                  start_offset < len - 1 &&
3156                  bptr[start_offset] == '\r' &&
3157                  bptr[start_offset+1] == '\n')
3158                onechar++;
3159              else if (use_utf8)
3160                {
3161                while (start_offset + onechar < len)
3162                  {
3163                  if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3164                  onechar++;
3165                  }
3166                }
3167              use_offsets[1] = start_offset + onechar;
3168            }            }
3169          else          else
3170            {            {
3171            if (gmatched == 0)   /* Error if no previous matches */            switch(count)
3172              {              {
3173              if (count == -1) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3174                else fprintf(outfile, "Error %d\n", count);              if (gmatched == 0)
3175                  {
3176                  if (markptr == NULL) fprintf(outfile, "No match\n");
3177                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3178                  }
3179                break;
3180    
3181                case PCRE_ERROR_BADUTF8:
3182                case PCRE_ERROR_SHORTUTF8:
3183                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3184                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3185                if (use_size_offsets >= 2)
3186                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3187                    use_offsets[1]);
3188                fprintf(outfile, "\n");
3189                break;
3190    
3191                default:
3192                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3193                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3194                else
3195                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3196                break;
3197              }              }
3198    
3199            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3200            }            }
3201          }          }
# Line 1212  while (!done) Line 3205  while (!done)
3205        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3206    
3207        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3208        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3209        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3210        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3211        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3212        character. */        character. */
3213    
3214        g_notempty = 0;        g_notempty = 0;
3215    
3216        if (use_offsets[0] == use_offsets[1])