/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 811 - (hide annotations) (download)
Mon Dec 19 14:05:44 2011 UTC (16 months, 4 weeks ago) by zherczeg
File MIME type: text/plain
File size: 104969 byte(s)
byte flip moved before disk write in pcretest and typo fixes
1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 808 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39    
40 ph10 200 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 200 #endif
43 ph10 199
44 nigel 3 #include <ctype.h>
45     #include <stdio.h>
46     #include <string.h>
47     #include <stdlib.h>
48     #include <time.h>
49 nigel 25 #include <locale.h>
50 nigel 75 #include <errno.h>
51 nigel 3
52 ph10 287 #ifdef SUPPORT_LIBREADLINE
53 ph10 343 #ifdef HAVE_UNISTD_H
54 ph10 287 #include <unistd.h>
55 ph10 343 #endif
56 ph10 287 #include <readline/readline.h>
57     #include <readline/history.h>
58     #endif
59 nigel 93
60 ph10 287
61 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
62     input and output without "b"; then I was told that "b" was needed in some
63     environments, so it was added for release 5.0 to both the input and output. (It
64     makes no difference on Unix-like systems.) Later I was told that it is wrong
65     for the input on Windows. I've now abstracted the modes into two macros that
66     are set here, to make it easier to fiddle with them, and removed "b" from the
67     input mode under Windows. */
68    
69     #if defined(_WIN32) || defined(WIN32)
70     #include <io.h> /* For _setmode() */
71     #include <fcntl.h> /* For _O_BINARY */
72     #define INPUT_MODE "r"
73     #define OUTPUT_MODE "wb"
74    
75 ph10 411 #ifndef isatty
76     #define isatty _isatty /* This is what Windows calls them, I'm told, */
77     #endif /* though in some environments they seem to */
78     /* be already defined, hence the #ifndefs. */
79     #ifndef fileno
80 ph10 343 #define fileno _fileno
81 ph10 411 #endif
82 ph10 343
83 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85     #ifdef __BORLANDC__
86     #define _setmode(handle, mode) setmode(handle, mode)
87     #endif
88    
89     /* Not Windows */
90    
91 nigel 93 #else
92     #include <sys/time.h> /* These two includes are needed */
93     #include <sys/resource.h> /* for setrlimit(). */
94     #define INPUT_MODE "rb"
95     #define OUTPUT_MODE "wb"
96 nigel 91 #endif
97    
98 nigel 93
99 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
100     displaying the results of pcre_study() and we also need to know about the
101     internal macros, structures, and other internal data values; pcretest has
102     "inside information" compared to a program that strictly follows the PCRE API.
103 nigel 37
104 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106     appropriately for an application, not for building PCRE. */
107 nigel 77
108 ph10 145 #include "pcre.h"
109 nigel 77 #include "pcre_internal.h"
110    
111 ph10 808 /* The pcre_printint() function, which prints the internal form of a compiled
112     regex, is held in a separate file so that (a) it can be compiled in either
113     8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 ph10 805 when that is compiled in debug mode. */
115    
116     #ifdef SUPPORT_PCRE8
117     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118     #endif
119     #ifdef SUPPORT_PCRE16
120     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121     #endif
122    
123 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
124     to keep two copies, we include the source file here, changing the names of the
125     external symbols to prevent clashes. */
126 nigel 77
127 ph10 351 #define _pcre_ucp_gentype ucp_gentype
128 ph10 667 #define _pcre_ucp_typerange ucp_typerange
129 nigel 85 #define _pcre_utf8_table1 utf8_table1
130     #define _pcre_utf8_table1_size utf8_table1_size
131     #define _pcre_utf8_table2 utf8_table2
132     #define _pcre_utf8_table3 utf8_table3
133     #define _pcre_utf8_table4 utf8_table4
134     #define _pcre_utt utt
135     #define _pcre_utt_size utt_size
136 ph10 240 #define _pcre_utt_names utt_names
137 nigel 85 #define _pcre_OP_lengths OP_lengths
138    
139     #include "pcre_tables.c"
140    
141 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
142 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
143 ph10 805 the same as in the printint.src file. We uses it here in cases when the locale
144     has not been explicitly changed, so as to get consistent output from systems
145     that differ in their output from isprint() even in the "C" locale. */
146 nigel 93
147 ph10 805 #ifdef EBCDIC
148     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149     #else
150     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151     #endif
152    
153 ph10 808 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154 nigel 85
155 nigel 37 /* It is possible to compile this test program without including support for
156     testing the POSIX interface, though this is not available via the standard
157     Makefile. */
158    
159     #if !defined NOPOSIX
160 nigel 3 #include "pcreposix.h"
161 nigel 37 #endif
162 nigel 3
163 ph10 808 /* It is also possible, originally for the benefit of a version that was
164     imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165     without the interface to the DFA matcher (NODFA), and without the doublecheck
166     of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167     out the UTF8 support if PCRE is built without it. */
168 nigel 79
169 ph10 107 #ifndef SUPPORT_UTF8
170     #ifndef NOUTF8
171     #define NOUTF8
172     #endif
173     #endif
174 nigel 79
175 ph10 808 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176     for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177     only from one place and is handled differently). I couldn't dream up any way of
178     using a single macro to do this in a generic way, because of the many different
179     argument requirements. We know that at least one of SUPPORT_PCRE8 and
180     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181     use these in the definitions of generic macros. */
182 ph10 107
183 ph10 808 #ifdef SUPPORT_PCRE8
184     #define PCHARS8(lv, p, len, f) \
185     lv = pchars((pcre_uint8 *)p, len, f)
186    
187     #define PCHARSV8(p, len, f) \
188     (void)pchars((pcre_uint8 *)p, len, f)
189    
190     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191     re = pcre_compile((char *)pat, options, error, erroffset, tables)
192    
193     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194     offsets, size_offsets) \
195     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196     offsets, size_offsets)
197    
198     #define PCRE_STUDY8(extra, re, options, error) \
199     extra = pcre_study(re, options, error)
200    
201 zherczeg 809 #define PCRE_FREE_STUDY8(extra) \
202     pcre_free_study(extra)
203 ph10 808
204 zherczeg 811 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
205     pcre_pattern_to_host_byte_order(re, extra, tables)
206    
207 zherczeg 809 #endif /* SUPPORT_PCRE8 */
208    
209    
210 ph10 808 #ifdef SUPPORT_PCRE16
211     #define PCHARS16(lv, p, len, f) \
212     lv = pchars16((PCRE_SPTR16)p, len, f)
213    
214     #define PCHARSV16(p, len, f) \
215     (void)pchars16((PCRE_SPTR16)p, len, f)
216    
217     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
218     re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
219    
220     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
221     offsets, size_offsets) \
222     count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
223     options, offsets, size_offsets)
224    
225 ph10 810 #define PCRE_FREE_STUDY16(extra) \
226     pcre16_free_study(extra)
227    
228 ph10 808 #define PCRE_STUDY16(extra, re, options, error) \
229     extra = pcre16_study(re, options, error)
230    
231 zherczeg 811 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
232     pcre16_pattern_to_host_byte_order(re, extra, tables)
233    
234 zherczeg 809 #endif /* SUPPORT_PCRE16 */
235    
236    
237 ph10 808 /* ----- Both modes are supported; a runtime test is needed ----- */
238    
239     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
240    
241     #define PCHARS(lv, p, len, f) \
242     if (use_pcre16) \
243     PCHARS16(lv, p, len, f); \
244     else \
245     PCHARS8(lv, p, len, f)
246    
247     #define PCHARSV(p, len, f) \
248     if (use_pcre16) \
249     PCHARSV16(p, len, f); \
250     else \
251     PCHARSV8(p, len, f)
252    
253     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
254     if (use_pcre16) \
255     PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
256     else \
257     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
258    
259     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
260     offsets, size_offsets) \
261     if (use_pcre16) \
262     PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
263     offsets, size_offsets); \
264     else \
265     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
266     offsets, size_offsets)
267    
268 ph10 810 #define PCRE_FREE_STUDY(extra) \
269     if (use_pcre16) \
270     PCRE_FREE_STUDY16(extra); \
271     else \
272     PCRE_FREE_STUDY8(extra)
273    
274 ph10 808 #define PCRE_STUDY(extra, re, options, error) \
275     if (use_pcre16) \
276     PCRE_STUDY16(extra, re, options, error); \
277     else \
278     PCRE_STUDY8(extra, re, options, error)
279    
280 zherczeg 811 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
281     if (use_pcre16) \
282     PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
283     else \
284     PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
285    
286 ph10 808 /* ----- Only 8-bit mode is supported ----- */
287    
288     #elif defined SUPPORT_PCRE8
289 zherczeg 809 #define PCHARS PCHARS8
290     #define PCHARSV PCHARSV8
291     #define PCRE_COMPILE PCRE_COMPILE8
292     #define PCRE_EXEC PCRE_EXEC8
293 ph10 810 #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
294 zherczeg 809 #define PCRE_STUDY PCRE_STUDY8
295 zherczeg 811 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
296 ph10 808
297     /* ----- Only 16-bit mode is supported ----- */
298    
299     #else
300 zherczeg 809 #define PCHARS PCHARS16
301     #define PCHARSV PCHARSV16
302     #define PCRE_COMPILE PCRE_COMPILE16
303     #define PCRE_EXEC PCRE_EXEC16
304 ph10 810 #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
305 zherczeg 809 #define PCRE_STUDY PCRE_STUDY16
306 zherczeg 811 #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
307 ph10 808 #endif
308    
309     /* ----- End of mode-specific function call macros ----- */
310    
311    
312 nigel 85 /* Other parameters */
313    
314 nigel 3 #ifndef CLOCKS_PER_SEC
315     #ifdef CLK_TCK
316     #define CLOCKS_PER_SEC CLK_TCK
317     #else
318     #define CLOCKS_PER_SEC 100
319     #endif
320     #endif
321    
322 nigel 93 /* This is the default loop count for timing. */
323    
324 nigel 75 #define LOOPREPEAT 500000
325 nigel 3
326 nigel 85 /* Static variables */
327    
328 nigel 3 static FILE *outfile;
329     static int log_store = 0;
330 nigel 63 static int callout_count;
331     static int callout_extra;
332     static int callout_fail_count;
333     static int callout_fail_id;
334 ph10 210 static int debug_lengths;
335 nigel 63 static int first_callout;
336 nigel 93 static int locale_set = 0;
337 nigel 73 static int show_malloc;
338 ph10 810 static int use_utf;
339 nigel 43 static size_t gotten_store;
340 ph10 801 static size_t first_gotten_store = 0;
341 ph10 645 static const unsigned char *last_callout_mark = NULL;
342 nigel 3
343 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
344    
345     static int buffer_size = 50000;
346 ph10 756 static pcre_uint8 *buffer = NULL;
347     static pcre_uint8 *dbuffer = NULL;
348     static pcre_uint8 *pbuffer = NULL;
349 nigel 3
350 ph10 805 #ifdef SUPPORT_PCRE16
351     static int buffer16_size = 0;
352     static pcre_uint16 *buffer16 = NULL;
353     #endif
354    
355 ph10 808 /* If we have 8-bit support, default use_pcre16 to false; if there is also
356     16-bit support, it can be changed by an option. If there is no 8-bit support,
357     there must be 16-bit support, so default it to 1. */
358    
359     #ifdef SUPPORT_PCRE8
360     static int use_pcre16 = 0;
361     #else
362     static int use_pcre16 = 1;
363     #endif
364    
365 ph10 598 /* Textual explanations for runtime error codes */
366 nigel 75
367 ph10 598 static const char *errtexts[] = {
368     NULL, /* 0 is no error */
369     NULL, /* NOMATCH is handled specially */
370     "NULL argument passed",
371     "bad option value",
372     "magic number missing",
373     "unknown opcode - pattern overwritten?",
374     "no more memory",
375 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
376 ph10 598 "match limit exceeded",
377     "callout error code",
378     NULL, /* BADUTF8 is handled specially */
379     "bad UTF-8 offset",
380     NULL, /* PARTIAL is handled specially */
381     "not used - internal error",
382     "internal error - pattern overwritten?",
383     "bad count value",
384     "item unsupported for DFA matching",
385     "backreference condition or recursion test not supported for DFA matching",
386     "match limit not supported for DFA matching",
387     "workspace size exceeded in DFA matching",
388 ph10 654 "too much recursion for DFA matching",
389 ph10 598 "recursion limit exceeded",
390     "not used - internal error",
391     "invalid combination of newline options",
392     "bad offset value",
393 ph10 642 NULL, /* SHORTUTF8 is handled specially */
394 ph10 676 "nested recursion at the same subject position",
395 ph10 805 "JIT stack limit reached",
396 ph10 808 "pattern compiled in wrong mode (8-bit/16-bit error)"
397 ph10 598 };
398    
399 ph10 654
400 ph10 541 /*************************************************
401     * Alternate character tables *
402     *************************************************/
403 nigel 49
404 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
405     using the default tables of the library. However, the T option can be used to
406     select alternate sets of tables, for different kinds of testing. Note also that
407 ph10 541 the L (locale) option also adjusts the tables. */
408    
409 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
410 ph10 541 only ASCII characters. */
411    
412 ph10 808 static const pcre_uint8 tables0[] = {
413 ph10 541
414     /* This table is a lower casing table. */
415    
416     0, 1, 2, 3, 4, 5, 6, 7,
417     8, 9, 10, 11, 12, 13, 14, 15,
418     16, 17, 18, 19, 20, 21, 22, 23,
419     24, 25, 26, 27, 28, 29, 30, 31,
420     32, 33, 34, 35, 36, 37, 38, 39,
421     40, 41, 42, 43, 44, 45, 46, 47,
422     48, 49, 50, 51, 52, 53, 54, 55,
423     56, 57, 58, 59, 60, 61, 62, 63,
424     64, 97, 98, 99,100,101,102,103,
425     104,105,106,107,108,109,110,111,
426     112,113,114,115,116,117,118,119,
427     120,121,122, 91, 92, 93, 94, 95,
428     96, 97, 98, 99,100,101,102,103,
429     104,105,106,107,108,109,110,111,
430     112,113,114,115,116,117,118,119,
431     120,121,122,123,124,125,126,127,
432     128,129,130,131,132,133,134,135,
433     136,137,138,139,140,141,142,143,
434     144,145,146,147,148,149,150,151,
435     152,153,154,155,156,157,158,159,
436     160,161,162,163,164,165,166,167,
437     168,169,170,171,172,173,174,175,
438     176,177,178,179,180,181,182,183,
439     184,185,186,187,188,189,190,191,
440     192,193,194,195,196,197,198,199,
441     200,201,202,203,204,205,206,207,
442     208,209,210,211,212,213,214,215,
443     216,217,218,219,220,221,222,223,
444     224,225,226,227,228,229,230,231,
445     232,233,234,235,236,237,238,239,
446     240,241,242,243,244,245,246,247,
447     248,249,250,251,252,253,254,255,
448    
449     /* This table is a case flipping table. */
450    
451     0, 1, 2, 3, 4, 5, 6, 7,
452     8, 9, 10, 11, 12, 13, 14, 15,
453     16, 17, 18, 19, 20, 21, 22, 23,
454     24, 25, 26, 27, 28, 29, 30, 31,
455     32, 33, 34, 35, 36, 37, 38, 39,
456     40, 41, 42, 43, 44, 45, 46, 47,
457     48, 49, 50, 51, 52, 53, 54, 55,
458     56, 57, 58, 59, 60, 61, 62, 63,
459     64, 97, 98, 99,100,101,102,103,
460     104,105,106,107,108,109,110,111,
461     112,113,114,115,116,117,118,119,
462     120,121,122, 91, 92, 93, 94, 95,
463     96, 65, 66, 67, 68, 69, 70, 71,
464     72, 73, 74, 75, 76, 77, 78, 79,
465     80, 81, 82, 83, 84, 85, 86, 87,
466     88, 89, 90,123,124,125,126,127,
467     128,129,130,131,132,133,134,135,
468     136,137,138,139,140,141,142,143,
469     144,145,146,147,148,149,150,151,
470     152,153,154,155,156,157,158,159,
471     160,161,162,163,164,165,166,167,
472     168,169,170,171,172,173,174,175,
473     176,177,178,179,180,181,182,183,
474     184,185,186,187,188,189,190,191,
475     192,193,194,195,196,197,198,199,
476     200,201,202,203,204,205,206,207,
477     208,209,210,211,212,213,214,215,
478     216,217,218,219,220,221,222,223,
479     224,225,226,227,228,229,230,231,
480     232,233,234,235,236,237,238,239,
481     240,241,242,243,244,245,246,247,
482     248,249,250,251,252,253,254,255,
483    
484     /* This table contains bit maps for various character classes. Each map is 32
485     bytes long and the bits run from the least significant end of each byte. The
486     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
487     graph, print, punct, and cntrl. Other classes are built from combinations. */
488    
489     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
490     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
491     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
492     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493    
494     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
495     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
496     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
497     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498    
499     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
500     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
501     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
502     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503    
504     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
505     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
506     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
507     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508    
509     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
510     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
511     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
512     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
513    
514     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
515     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
516     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
517     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518    
519     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
520     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
521     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
522     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523    
524     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
525     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
526     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
527     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
528    
529     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
530     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
531     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
532     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
533    
534     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
535     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
536     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
537     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
538    
539     /* This table identifies various classes of character by individual bits:
540     0x01 white space character
541     0x02 letter
542     0x04 decimal digit
543     0x08 hexadecimal digit
544     0x10 alphanumeric or '_'
545     0x80 regular expression metacharacter or binary zero
546     */
547    
548     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
549     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
550     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
551     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
552     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
553     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
554     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
555     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
556     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
557     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
558     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
559     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
560     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
561     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
562     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
563     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
564     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
565     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
566     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
567     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
568     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
569     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
570     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
571     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
572     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
573     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
574     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
575     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
576     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
577     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
578     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
579     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
580    
581 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
582     be at least an approximation of ISO 8859. In particular, there are characters
583 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
584    
585 ph10 808 static const pcre_uint8 tables1[] = {
586 ph10 541 0,1,2,3,4,5,6,7,
587     8,9,10,11,12,13,14,15,
588     16,17,18,19,20,21,22,23,
589     24,25,26,27,28,29,30,31,
590     32,33,34,35,36,37,38,39,
591     40,41,42,43,44,45,46,47,
592     48,49,50,51,52,53,54,55,
593     56,57,58,59,60,61,62,63,
594     64,97,98,99,100,101,102,103,
595     104,105,106,107,108,109,110,111,
596     112,113,114,115,116,117,118,119,
597     120,121,122,91,92,93,94,95,
598     96,97,98,99,100,101,102,103,
599     104,105,106,107,108,109,110,111,
600     112,113,114,115,116,117,118,119,
601     120,121,122,123,124,125,126,127,
602     128,129,130,131,132,133,134,135,
603     136,137,138,139,140,141,142,143,
604     144,145,146,147,148,149,150,151,
605     152,153,154,155,156,157,158,159,
606     160,161,162,163,164,165,166,167,
607     168,169,170,171,172,173,174,175,
608     176,177,178,179,180,181,182,183,
609     184,185,186,187,188,189,190,191,
610     224,225,226,227,228,229,230,231,
611     232,233,234,235,236,237,238,239,
612     240,241,242,243,244,245,246,215,
613     248,249,250,251,252,253,254,223,
614     224,225,226,227,228,229,230,231,
615     232,233,234,235,236,237,238,239,
616     240,241,242,243,244,245,246,247,
617     248,249,250,251,252,253,254,255,
618     0,1,2,3,4,5,6,7,
619     8,9,10,11,12,13,14,15,
620     16,17,18,19,20,21,22,23,
621     24,25,26,27,28,29,30,31,
622     32,33,34,35,36,37,38,39,
623     40,41,42,43,44,45,46,47,
624     48,49,50,51,52,53,54,55,
625     56,57,58,59,60,61,62,63,
626     64,97,98,99,100,101,102,103,
627     104,105,106,107,108,109,110,111,
628     112,113,114,115,116,117,118,119,
629     120,121,122,91,92,93,94,95,
630     96,65,66,67,68,69,70,71,
631     72,73,74,75,76,77,78,79,
632     80,81,82,83,84,85,86,87,
633     88,89,90,123,124,125,126,127,
634     128,129,130,131,132,133,134,135,
635     136,137,138,139,140,141,142,143,
636     144,145,146,147,148,149,150,151,
637     152,153,154,155,156,157,158,159,
638     160,161,162,163,164,165,166,167,
639     168,169,170,171,172,173,174,175,
640     176,177,178,179,180,181,182,183,
641     184,185,186,187,188,189,190,191,
642     224,225,226,227,228,229,230,231,
643     232,233,234,235,236,237,238,239,
644     240,241,242,243,244,245,246,215,
645     248,249,250,251,252,253,254,223,
646     192,193,194,195,196,197,198,199,
647     200,201,202,203,204,205,206,207,
648     208,209,210,211,212,213,214,247,
649     216,217,218,219,220,221,222,255,
650     0,62,0,0,1,0,0,0,
651     0,0,0,0,0,0,0,0,
652     32,0,0,0,1,0,0,0,
653     0,0,0,0,0,0,0,0,
654     0,0,0,0,0,0,255,3,
655     126,0,0,0,126,0,0,0,
656     0,0,0,0,0,0,0,0,
657     0,0,0,0,0,0,0,0,
658     0,0,0,0,0,0,255,3,
659     0,0,0,0,0,0,0,0,
660     0,0,0,0,0,0,12,2,
661     0,0,0,0,0,0,0,0,
662     0,0,0,0,0,0,0,0,
663     254,255,255,7,0,0,0,0,
664     0,0,0,0,0,0,0,0,
665     255,255,127,127,0,0,0,0,
666     0,0,0,0,0,0,0,0,
667     0,0,0,0,254,255,255,7,
668     0,0,0,0,0,4,32,4,
669     0,0,0,128,255,255,127,255,
670     0,0,0,0,0,0,255,3,
671     254,255,255,135,254,255,255,7,
672     0,0,0,0,0,4,44,6,
673     255,255,127,255,255,255,127,255,
674     0,0,0,0,254,255,255,255,
675     255,255,255,255,255,255,255,127,
676     0,0,0,0,254,255,255,255,
677     255,255,255,255,255,255,255,255,
678     0,2,0,0,255,255,255,255,
679     255,255,255,255,255,255,255,127,
680     0,0,0,0,255,255,255,255,
681     255,255,255,255,255,255,255,255,
682     0,0,0,0,254,255,0,252,
683     1,0,0,248,1,0,0,120,
684     0,0,0,0,254,255,255,255,
685     0,0,128,0,0,0,128,0,
686     255,255,255,255,0,0,0,0,
687     0,0,0,0,0,0,0,128,
688     255,255,255,255,0,0,0,0,
689     0,0,0,0,0,0,0,0,
690     128,0,0,0,0,0,0,0,
691     0,1,1,0,1,1,0,0,
692     0,0,0,0,0,0,0,0,
693     0,0,0,0,0,0,0,0,
694     1,0,0,0,128,0,0,0,
695     128,128,128,128,0,0,128,0,
696     28,28,28,28,28,28,28,28,
697     28,28,0,0,0,0,0,128,
698     0,26,26,26,26,26,26,18,
699     18,18,18,18,18,18,18,18,
700     18,18,18,18,18,18,18,18,
701     18,18,18,128,128,0,128,16,
702     0,26,26,26,26,26,26,18,
703     18,18,18,18,18,18,18,18,
704     18,18,18,18,18,18,18,18,
705     18,18,18,128,128,0,0,0,
706     0,0,0,0,0,1,0,0,
707     0,0,0,0,0,0,0,0,
708     0,0,0,0,0,0,0,0,
709     0,0,0,0,0,0,0,0,
710     1,0,0,0,0,0,0,0,
711     0,0,18,0,0,0,0,0,
712     0,0,20,20,0,18,0,0,
713     0,20,18,0,0,0,0,0,
714     18,18,18,18,18,18,18,18,
715     18,18,18,18,18,18,18,18,
716     18,18,18,18,18,18,18,0,
717     18,18,18,18,18,18,18,18,
718     18,18,18,18,18,18,18,18,
719     18,18,18,18,18,18,18,18,
720     18,18,18,18,18,18,18,0,
721     18,18,18,18,18,18,18,18
722     };
723    
724    
725    
726 ph10 558
727     #ifndef HAVE_STRERROR
728 nigel 49 /*************************************************
729 ph10 558 * Provide strerror() for non-ANSI libraries *
730     *************************************************/
731    
732     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
733     in their libraries, but can provide the same facility by this simple
734     alternative function. */
735    
736     extern int sys_nerr;
737     extern char *sys_errlist[];
738    
739     char *
740     strerror(int n)
741     {
742     if (n < 0 || n >= sys_nerr) return "unknown error number";
743     return sys_errlist[n];
744     }
745     #endif /* HAVE_STRERROR */
746    
747    
748 ph10 667 /*************************************************
749     * JIT memory callback *
750     *************************************************/
751 ph10 558
752 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
753     {
754     return (pcre_jit_stack *)arg;
755     }
756 ph10 558
757 ph10 667
758 ph10 808 /*************************************************
759     * Convert UTF-8 string to value *
760     *************************************************/
761    
762     /* This function takes one or more bytes that represents a UTF-8 character,
763     and returns the value of the character.
764    
765     Argument:
766     utf8bytes a pointer to the byte vector
767     vptr a pointer to an int to receive the value
768    
769     Returns: > 0 => the number of bytes consumed
770     -6 to 0 => malformed UTF-8 character at offset = (-return)
771     */
772    
773     #if !defined NOUTF8
774    
775     static int
776     utf82ord(pcre_uint8 *utf8bytes, int *vptr)
777     {
778     int c = *utf8bytes++;
779     int d = c;
780     int i, j, s;
781    
782     for (i = -1; i < 6; i++) /* i is number of additional bytes */
783     {
784     if ((d & 0x80) == 0) break;
785     d <<= 1;
786     }
787    
788     if (i == -1) { *vptr = c; return 1; } /* ascii character */
789     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
790    
791     /* i now has a value in the range 1-5 */
792    
793     s = 6*i;
794     d = (c & utf8_table3[i]) << s;
795    
796     for (j = 0; j < i; j++)
797     {
798     c = *utf8bytes++;
799     if ((c & 0xc0) != 0x80) return -(j+1);
800     s -= 6;
801     d |= (c & 0x3f) << s;
802     }
803    
804     /* Check that encoding was the correct unique one */
805    
806     for (j = 0; j < utf8_table1_size; j++)
807     if (d <= utf8_table1[j]) break;
808     if (j != i) return -(i+1);
809    
810     /* Valid value */
811    
812     *vptr = d;
813     return i+1;
814     }
815    
816     #endif
817    
818    
819    
820     /*************************************************
821     * Convert character value to UTF-8 *
822     *************************************************/
823    
824     /* This function takes an integer value in the range 0 - 0x7fffffff
825     and encodes it as a UTF-8 character in 0 to 6 bytes.
826    
827     Arguments:
828     cvalue the character value
829     utf8bytes pointer to buffer for result - at least 6 bytes long
830    
831     Returns: number of characters placed in the buffer
832     */
833    
834     #if !defined NOUTF8
835    
836     static int
837     ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
838     {
839     register int i, j;
840     for (i = 0; i < utf8_table1_size; i++)
841     if (cvalue <= utf8_table1[i]) break;
842     utf8bytes += i;
843     for (j = i; j > 0; j--)
844     {
845     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
846     cvalue >>= 6;
847     }
848     *utf8bytes = utf8_table2[i] | cvalue;
849     return i + 1;
850     }
851    
852     #endif
853    
854    
855    
856 ph10 805 #ifdef SUPPORT_PCRE16
857 ph10 558 /*************************************************
858 ph10 805 * Convert a string to 16-bit *
859     *************************************************/
860    
861 ph10 808 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
862     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
863     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
864     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
865 ph10 810 result is always left in buffer16.
866 ph10 805
867 ph10 810 Arguments:
868     p points to a byte string
869     utf true if UTF-8 (to be converted to UTF-16)
870     len number of bytes in the string (excluding trailing zero)
871    
872     Returns: number of 16-bit data items used (excluding trailing zero)
873     OR -1 if a UTF-8 string is malformed
874     */
875    
876 ph10 805 static int
877 ph10 808 to16(pcre_uint8 *p, int utf, int len)
878 ph10 805 {
879     pcre_uint16 *pp;
880    
881 ph10 808 if (buffer16_size < 2*len + 2)
882 ph10 805 {
883     if (buffer16 != NULL) free(buffer16);
884 ph10 808 buffer16_size = 2*len + 2;
885 ph10 805 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
886 ph10 808 if (buffer16 == NULL)
887 ph10 805 {
888     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
889     exit(1);
890     }
891     }
892 ph10 808
893 ph10 805 pp = buffer16;
894    
895     if (!utf)
896     {
897 ph10 808 while (len-- > 0) *pp++ = *p++;
898 ph10 805 }
899 ph10 808
900 ph10 805 else
901     {
902 ph10 808 int c;
903     while (len > 0)
904     {
905     int chlen = utf82ord(p, &c);
906 ph10 810 if (chlen <= 0) return -1;
907 ph10 808 p += chlen;
908     len -= chlen;
909     if (c < 0x10000) *pp++ = c; else
910     {
911     c -= 0x10000;
912     *pp++ = 0xD800 | (c >> 10);
913     *pp++ = 0xDC00 | (c & 0x3ff);
914     }
915     }
916     }
917    
918     *pp = 0;
919 ph10 805 return pp - buffer16;
920 ph10 808 }
921 ph10 805 #endif
922    
923    
924     /*************************************************
925 nigel 91 * Read or extend an input line *
926     *************************************************/
927    
928     /* Input lines are read into buffer, but both patterns and data lines can be
929     continued over multiple input lines. In addition, if the buffer fills up, we
930     want to automatically expand it so as to be able to handle extremely large
931     lines that are needed for certain stress tests. When the input buffer is
932     expanded, the other two buffers must also be expanded likewise, and the
933     contents of pbuffer, which are a copy of the input for callouts, must be
934     preserved (for when expansion happens for a data line). This is not the most
935     optimal way of handling this, but hey, this is just a test program!
936    
937     Arguments:
938     f the file to read
939     start where in buffer to start (this *must* be within buffer)
940 ph10 287 prompt for stdin or readline()
941 nigel 91
942     Returns: pointer to the start of new data
943     could be a copy of start, or could be moved
944     NULL if no data read and EOF reached
945     */
946    
947 ph10 756 static pcre_uint8 *
948     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
949 nigel 91 {
950 ph10 756 pcre_uint8 *here = start;
951 nigel 91
952     for (;;)
953     {
954 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
955 nigel 93
956 nigel 91 if (rlen > 1000)
957     {
958     int dlen;
959 ph10 289
960 ph10 287 /* If libreadline support is required, use readline() to read a line if the
961     input is a terminal. Note that readline() removes the trailing newline, so
962     we must put it back again, to be compatible with fgets(). */
963 ph10 289
964 ph10 287 #ifdef SUPPORT_LIBREADLINE
965     if (isatty(fileno(f)))
966     {
967 ph10 289 size_t len;
968 ph10 287 char *s = readline(prompt);
969     if (s == NULL) return (here == start)? NULL : start;
970     len = strlen(s);
971 ph10 289 if (len > 0) add_history(s);
972 ph10 287 if (len > rlen - 1) len = rlen - 1;
973     memcpy(here, s, len);
974     here[len] = '\n';
975 ph10 289 here[len+1] = 0;
976     free(s);
977 ph10 287 }
978 ph10 289 else
979     #endif
980    
981 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
982 ph10 289
983 ph10 287 {
984 ph10 516 if (f == stdin) printf("%s", prompt);
985 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
986     return (here == start)? NULL : start;
987 ph10 289 }
988    
989 nigel 91 dlen = (int)strlen((char *)here);
990     if (dlen > 0 && here[dlen - 1] == '\n') return start;
991     here += dlen;
992     }
993    
994     else
995     {
996     int new_buffer_size = 2*buffer_size;
997 ph10 808 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
998     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
999     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1000 nigel 91
1001     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1002     {
1003     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
1004     exit(1);
1005     }
1006    
1007     memcpy(new_buffer, buffer, buffer_size);
1008     memcpy(new_pbuffer, pbuffer, buffer_size);
1009    
1010     buffer_size = new_buffer_size;
1011    
1012     start = new_buffer + (start - buffer);
1013     here = new_buffer + (here - buffer);
1014    
1015     free(buffer);
1016     free(dbuffer);
1017     free(pbuffer);
1018    
1019     buffer = new_buffer;
1020     dbuffer = new_dbuffer;
1021     pbuffer = new_pbuffer;
1022     }
1023     }
1024    
1025     return NULL; /* Control never gets here */
1026     }
1027    
1028    
1029    
1030     /*************************************************
1031 nigel 63 * Read number from string *
1032     *************************************************/
1033    
1034     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1035     around with conditional compilation, just do the job by hand. It is only used
1036 nigel 93 for unpicking arguments, so just keep it simple.
1037 nigel 63
1038     Arguments:
1039     str string to be converted
1040     endptr where to put the end pointer
1041    
1042     Returns: the unsigned long
1043     */
1044    
1045     static int
1046 ph10 808 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1047 nigel 63 {
1048     int result = 0;
1049     while(*str != 0 && isspace(*str)) str++;
1050     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1051     *endptr = str;
1052     return(result);
1053     }
1054    
1055    
1056    
1057 ph10 810 /*************************************************
1058     * Print one character *
1059     *************************************************/
1060    
1061     /* Print a single character either literally, or as a hex escape. */
1062    
1063     static int pchar(int c, FILE *f)
1064     {
1065     if (PRINTOK(c))
1066     {
1067     if (f != NULL) fprintf(f, "%c", c);
1068     return 1;
1069     }
1070    
1071     if (c < 0x100)
1072     {
1073     if (use_utf)
1074     {
1075     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1076     return 6;
1077     }
1078     else
1079     {
1080     if (f != NULL) fprintf(f, "\\x%02x", c);
1081     return 4;
1082     }
1083     }
1084    
1085     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1086     return (c <= 0x000000ff)? 6 :
1087     (c <= 0x00000fff)? 7 :
1088     (c <= 0x0000ffff)? 8 :
1089     (c <= 0x000fffff)? 9 : 10;
1090     }
1091    
1092    
1093    
1094 ph10 808 #ifdef SUPPORT_PCRE8
1095 nigel 49 /*************************************************
1096 ph10 808 * Print 8-bit character string *
1097 nigel 49 *************************************************/
1098    
1099 ph10 808 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1100     If handed a NULL file, just counts chars without printing. */
1101 nigel 49
1102 ph10 808 static int pchars(pcre_uint8 *p, int length, FILE *f)
1103 nigel 49 {
1104 nigel 85 int c = 0;
1105 nigel 63 int yield = 0;
1106 nigel 3
1107 nigel 63 while (length-- > 0)
1108 nigel 3 {
1109 nigel 79 #if !defined NOUTF8
1110 ph10 810 if (use_utf)
1111 nigel 63 {
1112     int rc = utf82ord(p, &c);
1113     if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1114     {
1115     length -= rc - 1;
1116     p += rc;
1117 ph10 810 yield += pchar(c, f);
1118     continue;
1119 nigel 63 }
1120     }
1121 nigel 79 #endif
1122 nigel 93 c = *p++;
1123 ph10 810 yield += pchar(c, f);
1124 nigel 63 }
1125 nigel 3
1126 nigel 63 return yield;
1127     }
1128 ph10 808 #endif
1129 nigel 23
1130 nigel 3
1131 nigel 23
1132 ph10 808 #ifdef SUPPORT_PCRE16
1133 nigel 63 /*************************************************
1134 ph10 808 * Print 16-bit character string *
1135     *************************************************/
1136    
1137     /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1138     If handed a NULL file, just counts chars without printing. */
1139    
1140     static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1141     {
1142     int yield = 0;
1143    
1144     while (length-- > 0)
1145     {
1146     int c = *p++ & 0xffff;
1147     #if !defined NOUTF8
1148 ph10 810 if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1149 ph10 808 {
1150     int d = *p & 0xffff;
1151     if (d >= 0xDC00 && d < 0xDFFF)
1152     {
1153     c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1154     length--;
1155     p++;
1156     }
1157     }
1158     #endif
1159 ph10 810 yield += pchar(c, f);
1160 ph10 808 }
1161    
1162     return yield;
1163     }
1164     #endif
1165    
1166    
1167    
1168     /*************************************************
1169 nigel 63 * Callout function *
1170     *************************************************/
1171 nigel 3
1172 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1173     the match. Yield zero unless more callouts than the fail count, or the callout
1174     data is not zero. */
1175 nigel 3
1176 nigel 63 static int callout(pcre_callout_block *cb)
1177     {
1178     FILE *f = (first_callout | callout_extra)? outfile : NULL;
1179 nigel 75 int i, pre_start, post_start, subject_length;
1180 nigel 3
1181 nigel 63 if (callout_extra)
1182     {
1183     fprintf(f, "Callout %d: last capture = %d\n",
1184     cb->callout_number, cb->capture_last);
1185 nigel 3
1186 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
1187     {
1188     if (cb->offset_vector[i] < 0)
1189     fprintf(f, "%2d: <unset>\n", i/2);
1190     else
1191     {
1192     fprintf(f, "%2d: ", i/2);
1193 ph10 808 PCHARSV(cb->subject + cb->offset_vector[i],
1194 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1195     fprintf(f, "\n");
1196     }
1197     }
1198     }
1199 nigel 3
1200 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
1201     datails. On subsequent calls in the same match, we use pchars just to find the
1202     printed lengths of the substrings. */
1203 nigel 3
1204 nigel 63 if (f != NULL) fprintf(f, "--->");
1205 nigel 3
1206 ph10 808 PCHARS(pre_start, cb->subject, cb->start_match, f);
1207     PCHARS(post_start, cb->subject + cb->start_match,
1208 nigel 63 cb->current_position - cb->start_match, f);
1209 nigel 3
1210 ph10 808 PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1211 nigel 75
1212 ph10 808 PCHARSV(cb->subject + cb->current_position,
1213 nigel 63 cb->subject_length - cb->current_position, f);
1214 nigel 3
1215 nigel 63 if (f != NULL) fprintf(f, "\n");
1216 nigel 9
1217 nigel 63 /* Always print appropriate indicators, with callout number if not already
1218 nigel 75 shown. For automatic callouts, show the pattern offset. */
1219 nigel 3
1220 nigel 75 if (cb->callout_number == 255)
1221     {
1222     fprintf(outfile, "%+3d ", cb->pattern_position);
1223     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1224     }
1225     else
1226     {
1227     if (callout_extra) fprintf(outfile, " ");
1228     else fprintf(outfile, "%3d ", cb->callout_number);
1229     }
1230 nigel 3
1231 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1232     fprintf(outfile, "^");
1233 nigel 3
1234 nigel 63 if (post_start > 0)
1235     {
1236     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1237     fprintf(outfile, "^");
1238 nigel 3 }
1239    
1240 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1241     fprintf(outfile, " ");
1242    
1243     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1244     pbuffer + cb->pattern_position);
1245    
1246 nigel 63 fprintf(outfile, "\n");
1247     first_callout = 0;
1248 nigel 3
1249 ph10 654 if (cb->mark != last_callout_mark)
1250 ph10 645 {
1251 ph10 654 fprintf(outfile, "Latest Mark: %s\n",
1252 ph10 645 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1253 ph10 654 last_callout_mark = cb->mark;
1254     }
1255 ph10 645
1256 nigel 71 if (cb->callout_data != NULL)
1257 nigel 49 {
1258 nigel 71 int callout_data = *((int *)(cb->callout_data));
1259     if (callout_data != 0)
1260     {
1261     fprintf(outfile, "Callout data = %d\n", callout_data);
1262     return callout_data;
1263     }
1264 nigel 63 }
1265 nigel 49
1266 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
1267     (++callout_count >= callout_fail_count)? 1 : 0;
1268 nigel 3 }
1269    
1270    
1271 nigel 63 /*************************************************
1272 nigel 73 * Local malloc functions *
1273 nigel 63 *************************************************/
1274 nigel 3
1275 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1276 ph10 801 compiled re, which is the first store request that pcre_compile() makes. The
1277     show_malloc variable is set only during matching. */
1278 nigel 3
1279     static void *new_malloc(size_t size)
1280     {
1281 nigel 73 void *block = malloc(size);
1282 nigel 43 gotten_store = size;
1283 ph10 801 if (first_gotten_store == 0) first_gotten_store = size;
1284 nigel 73 if (show_malloc)
1285 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1286 nigel 73 return block;
1287 nigel 3 }
1288    
1289 nigel 73 static void new_free(void *block)
1290     {
1291     if (show_malloc)
1292     fprintf(outfile, "free %p\n", block);
1293     free(block);
1294     }
1295 nigel 3
1296 nigel 73 /* For recursion malloc/free, to test stacking calls */
1297    
1298     static void *stack_malloc(size_t size)
1299     {
1300     void *block = malloc(size);
1301     if (show_malloc)
1302 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1303 nigel 73 return block;
1304     }
1305    
1306     static void stack_free(void *block)
1307     {
1308     if (show_malloc)
1309     fprintf(outfile, "stack_free %p\n", block);
1310     free(block);
1311     }
1312    
1313    
1314 nigel 63 /*************************************************
1315     * Call pcre_fullinfo() *
1316     *************************************************/
1317 nigel 43
1318 ph10 808 /* Get one piece of information from the pcre_fullinfo() function. When only
1319     one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1320     value, but the code is defensive. */
1321 nigel 43
1322     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1323     {
1324     int rc;
1325 ph10 808
1326     if (use_pcre16)
1327     #ifdef SUPPORT_PCRE16
1328     rc = pcre16_fullinfo(re, study, option, ptr);
1329     #else
1330     rc = PCRE_ERROR_BADMODE;
1331     #endif
1332     else
1333     #ifdef SUPPORT_PCRE8
1334     rc = pcre_fullinfo(re, study, option, ptr);
1335     #else
1336     rc = PCRE_ERROR_BADMODE;
1337     #endif
1338    
1339     if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1340     use_pcre16? "16" : "", option);
1341 nigel 43 }
1342    
1343    
1344    
1345 nigel 63 /*************************************************
1346 nigel 75 * Byte flipping function *
1347     *************************************************/
1348    
1349 nigel 91 static unsigned long int
1350     byteflip(unsigned long int value, int n)
1351 nigel 75 {
1352     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1353     return ((value & 0x000000ff) << 24) |
1354     ((value & 0x0000ff00) << 8) |
1355     ((value & 0x00ff0000) >> 8) |
1356     ((value & 0xff000000) >> 24);
1357     }
1358    
1359    
1360    
1361    
1362     /*************************************************
1363 nigel 87 * Check match or recursion limit *
1364     *************************************************/
1365    
1366     static int
1367 ph10 756 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1368 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
1369     int flag, unsigned long int *limit, int errnumber, const char *msg)
1370     {
1371     int count;
1372     int min = 0;
1373     int mid = 64;
1374     int max = -1;
1375    
1376     extra->flags |= flag;
1377    
1378     for (;;)
1379     {
1380     *limit = mid;
1381    
1382 ph10 808 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1383 nigel 87 use_offsets, use_size_offsets);
1384    
1385     if (count == errnumber)
1386     {
1387     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1388     min = mid;
1389     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1390     }
1391    
1392     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1393     count == PCRE_ERROR_PARTIAL)
1394     {
1395     if (mid == min + 1)
1396     {
1397     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1398     break;
1399     }
1400     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1401     max = mid;
1402     mid = (min + mid)/2;
1403     }
1404     else break; /* Some other error */
1405     }
1406    
1407     extra->flags &= ~flag;
1408     return count;
1409     }
1410    
1411    
1412    
1413     /*************************************************
1414 ph10 227 * Case-independent strncmp() function *
1415     *************************************************/
1416    
1417     /*
1418     Arguments:
1419     s first string
1420     t second string
1421     n number of characters to compare
1422    
1423     Returns: < 0, = 0, or > 0, according to the comparison
1424     */
1425    
1426     static int
1427 ph10 756 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1428 ph10 227 {
1429     while (n--)
1430     {
1431     int c = tolower(*s++) - tolower(*t++);
1432     if (c) return c;
1433     }
1434     return 0;
1435     }
1436    
1437    
1438    
1439     /*************************************************
1440 nigel 91 * Check newline indicator *
1441     *************************************************/
1442    
1443 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1444     a message and return 0 if there is no match.
1445 nigel 91
1446     Arguments:
1447     p points after the leading '<'
1448     f file for error message
1449    
1450     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1451     */
1452    
1453     static int
1454 ph10 756 check_newline(pcre_uint8 *p, FILE *f)
1455 nigel 91 {
1456 ph10 756 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1457     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1458     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1459     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1460     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1461     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1462     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1463 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1464     return 0;
1465     }
1466    
1467    
1468    
1469     /*************************************************
1470 nigel 93 * Usage function *
1471     *************************************************/
1472    
1473     static void
1474     usage(void)
1475     {
1476 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1477     printf("Input and output default to stdin and stdout.\n");
1478     #ifdef SUPPORT_LIBREADLINE
1479     printf("If input is a terminal, readline() is used to read from it.\n");
1480     #else
1481     printf("This version of pcretest is not linked with readline().\n");
1482     #endif
1483     printf("\nOptions:\n");
1484 ph10 805 #ifdef SUPPORT_PCRE16
1485     printf(" -16 use 16-bit interface\n");
1486     #endif
1487 nigel 93 printf(" -b show compiled code (bytecode)\n");
1488     printf(" -C show PCRE compile-time options and exit\n");
1489     printf(" -d debug: show compiled code and information (-b and -i)\n");
1490     #if !defined NODFA
1491     printf(" -dfa force DFA matching for all subjects\n");
1492     #endif
1493     printf(" -help show usage information\n");
1494     printf(" -i show information about compiled patterns\n"
1495 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1496 nigel 93 " -m output memory used information\n"
1497     " -o <n> set size of offsets vector to <n>\n");
1498     #if !defined NOPOSIX
1499     printf(" -p use POSIX interface\n");
1500     #endif
1501     printf(" -q quiet: do not output PCRE version number at start\n");
1502     printf(" -S <n> set stack size to <n> megabytes\n");
1503 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
1504     " -s+ force each pattern to be studied, using JIT if available\n"
1505 nigel 93 " -t time compilation and execution\n");
1506     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1507     printf(" -tm time execution (matching) only\n");
1508     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1509     }
1510    
1511    
1512    
1513     /*************************************************
1514 nigel 63 * Main Program *
1515     *************************************************/
1516 nigel 43
1517 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1518     consist of a regular expression, in delimiters and optionally followed by
1519     options, followed by a set of test data, terminated by an empty line. */
1520    
1521     int main(int argc, char **argv)
1522     {
1523     FILE *infile = stdin;
1524     int options = 0;
1525     int study_options = 0;
1526 ph10 386 int default_find_match_limit = FALSE;
1527 nigel 3 int op = 1;
1528     int timeit = 0;
1529 nigel 93 int timeitm = 0;
1530 nigel 3 int showinfo = 0;
1531 nigel 31 int showstore = 0;
1532 ph10 667 int force_study = -1;
1533     int force_study_options = 0;
1534 nigel 87 int quiet = 0;
1535 nigel 53 int size_offsets = 45;
1536     int size_offsets_max;
1537 nigel 77 int *offsets = NULL;
1538 nigel 53 #if !defined NOPOSIX
1539 nigel 3 int posix = 0;
1540 nigel 53 #endif
1541 nigel 3 int debug = 0;
1542 nigel 11 int done = 0;
1543 nigel 77 int all_use_dfa = 0;
1544     int yield = 0;
1545 nigel 91 int stack_size;
1546 nigel 3
1547 ph10 667 pcre_jit_stack *jit_stack = NULL;
1548    
1549 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1550     that 1024 is plenty long enough for the few names we'll be testing. */
1551 nigel 69
1552 ph10 756 pcre_uchar copynames[1024];
1553     pcre_uchar getnames[1024];
1554 nigel 91
1555 ph10 756 pcre_uchar *copynamesptr;
1556     pcre_uchar *getnamesptr;
1557 nigel 91
1558 ph10 805 /* Get buffers from malloc() so that valgrind will check their misuse when
1559 ph10 808 debugging. They grow automatically when very long lines are read. The 16-bit
1560 ph10 805 buffer (buffer16) is obtained only if needed. */
1561 nigel 69
1562 ph10 756 buffer = (pcre_uint8 *)malloc(buffer_size);
1563     dbuffer = (pcre_uint8 *)malloc(buffer_size);
1564     pbuffer = (pcre_uint8 *)malloc(buffer_size);
1565 nigel 69
1566 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1567 nigel 3
1568 nigel 93 outfile = stdout;
1569    
1570     /* The following _setmode() stuff is some Windows magic that tells its runtime
1571     library to translate CRLF into a single LF character. At least, that's what
1572     I've been told: never having used Windows I take this all on trust. Originally
1573     it set 0x8000, but then I was advised that _O_BINARY was better. */
1574    
1575 nigel 75 #if defined(_WIN32) || defined(WIN32)
1576 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1577     #endif
1578 nigel 75
1579 nigel 3 /* Scan options */
1580    
1581     while (argc > 1 && argv[op][0] == '-')
1582     {
1583 ph10 808 pcre_uint8 *endptr;
1584 nigel 53
1585 ph10 808 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1586 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1587 ph10 691 else if (strcmp(argv[op], "-s+") == 0)
1588 ph10 667 {
1589     force_study = 1;
1590     force_study_options = PCRE_STUDY_JIT_COMPILE;
1591 ph10 691 }
1592 ph10 808 #ifdef SUPPORT_PCRE16
1593     else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1594     #endif
1595    
1596 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1597 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1598 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1599     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1600 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1601 nigel 79 #if !defined NODFA
1602 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1603 nigel 79 #endif
1604 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1605 ph10 808 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1606 nigel 65 *endptr == 0))
1607 nigel 53 {
1608     op++;
1609     argc--;
1610     }
1611 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1612     {
1613     int both = argv[op][2] == 0;
1614     int temp;
1615 ph10 808 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1616 nigel 93 *endptr == 0))
1617     {
1618     timeitm = temp;
1619     op++;
1620     argc--;
1621     }
1622     else timeitm = LOOPREPEAT;
1623     if (both) timeit = timeitm;
1624     }
1625 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1626 ph10 808 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1627 nigel 91 *endptr == 0))
1628     {
1629 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1630 nigel 91 printf("PCRE: -S not supported on this OS\n");
1631     exit(1);
1632     #else
1633     int rc;
1634     struct rlimit rlim;
1635     getrlimit(RLIMIT_STACK, &rlim);
1636     rlim.rlim_cur = stack_size * 1024 * 1024;
1637     rc = setrlimit(RLIMIT_STACK, &rlim);
1638     if (rc != 0)
1639     {
1640     printf("PCRE: setrlimit() failed with error %d\n", rc);
1641     exit(1);
1642     }
1643     op++;
1644     argc--;
1645     #endif
1646     }
1647 nigel 53 #if !defined NOPOSIX
1648 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1649 nigel 53 #endif
1650 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1651     {
1652     int rc;
1653 ph10 392 unsigned long int lrc;
1654 nigel 63 printf("PCRE version %s\n", pcre_version());
1655     printf("Compiled with\n");
1656 ph10 805
1657 ph10 808 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1658     are set, either both UTFs are supported or both are not supported. */
1659    
1660 ph10 805 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1661     printf(" 8-bit and 16-bit support\n");
1662 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1663 ph10 808 if (rc)
1664     printf(" UTF-8 and UTF-16 support\n");
1665     else
1666     printf(" No UTF-8 or UTF-16 support\n");
1667 ph10 805 #elif defined SUPPORT_PCRE8
1668     printf(" 8-bit support only\n");
1669     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1670     printf(" %sUTF-8 support\n", rc? "" : "No ");
1671 ph10 808 #else
1672 ph10 805 printf(" 16-bit support only\n");
1673     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1674     printf(" %sUTF-16 support\n", rc? "" : "No ");
1675 ph10 808 #endif
1676    
1677 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1678     printf(" %sUnicode properties support\n", rc? "" : "No ");
1679 ph10 667 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1680 ph10 674 if (rc)
1681 ph10 689 printf(" Just-in-time compiler support\n");
1682 ph10 674 else
1683     printf(" No just-in-time compiler support\n");
1684 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1685 ph10 391 /* Note that these values are always the ASCII values, even
1686 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1687 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1688     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1689 ph10 150 (rc == -2)? "ANYCRLF" :
1690 nigel 93 (rc == -1)? "ANY" : "???");
1691 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1692     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1693     "all Unicode newlines");
1694 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1695     printf(" Internal link size = %d\n", rc);
1696     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1697     printf(" POSIX malloc threshold = %d\n", rc);
1698 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1699     printf(" Default match limit = %ld\n", lrc);
1700     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1701     printf(" Default recursion depth limit = %ld\n", lrc);
1702 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1703     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1704 ph10 121 goto EXIT;
1705 nigel 63 }
1706 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1707     strcmp(argv[op], "--help") == 0)
1708     {
1709     usage();
1710     goto EXIT;
1711     }
1712 nigel 3 else
1713     {
1714 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1715 nigel 93 usage();
1716 nigel 77 yield = 1;
1717     goto EXIT;
1718 nigel 3 }
1719     op++;
1720     argc--;
1721     }
1722    
1723 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1724    
1725     size_offsets_max = size_offsets;
1726 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1727 nigel 53 if (offsets == NULL)
1728     {
1729     printf("** Failed to get %d bytes of memory for offsets vector\n",
1730 ph10 151 (int)(size_offsets_max * sizeof(int)));
1731 nigel 77 yield = 1;
1732     goto EXIT;
1733 nigel 53 }
1734    
1735 nigel 3 /* Sort out the input and output files */
1736    
1737     if (argc > 1)
1738     {
1739 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1740 nigel 3 if (infile == NULL)
1741     {
1742     printf("** Failed to open %s\n", argv[op]);
1743 nigel 77 yield = 1;
1744     goto EXIT;
1745 nigel 3 }
1746     }
1747    
1748     if (argc > 2)
1749     {
1750 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1751 nigel 3 if (outfile == NULL)
1752     {
1753     printf("** Failed to open %s\n", argv[op+1]);
1754 nigel 77 yield = 1;
1755     goto EXIT;
1756 nigel 3 }
1757     }
1758    
1759     /* Set alternative malloc function */
1760    
1761 ph10 805 #ifdef SUPPORT_PCRE8
1762 nigel 3 pcre_malloc = new_malloc;
1763 nigel 73 pcre_free = new_free;
1764     pcre_stack_malloc = stack_malloc;
1765     pcre_stack_free = stack_free;
1766 ph10 805 #endif
1767 nigel 3
1768 ph10 805 #ifdef SUPPORT_PCRE16
1769     pcre16_malloc = new_malloc;
1770     pcre16_free = new_free;
1771     pcre16_stack_malloc = stack_malloc;
1772     pcre16_stack_free = stack_free;
1773     #endif
1774    
1775 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1776 nigel 3
1777 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1778 nigel 3
1779     /* Main loop */
1780    
1781 nigel 11 while (!done)
1782 nigel 3 {
1783     pcre *re = NULL;
1784     pcre_extra *extra = NULL;
1785 nigel 37
1786     #if !defined NOPOSIX /* There are still compilers that require no indent */
1787 nigel 3 regex_t preg;
1788 nigel 45 int do_posix = 0;
1789 nigel 37 #endif
1790    
1791 nigel 7 const char *error;
1792 ph10 808 pcre_uint8 *markptr;
1793     pcre_uint8 *p, *pp, *ppp;
1794     pcre_uint8 *to_file = NULL;
1795     const pcre_uint8 *tables = NULL;
1796 nigel 75 unsigned long int true_size, true_study_size = 0;
1797     size_t size, regex_gotten_store;
1798 ph10 654 int do_allcaps = 0;
1799 ph10 512 int do_mark = 0;
1800 nigel 3 int do_study = 0;
1801 ph10 654 int no_force_study = 0;
1802 nigel 25 int do_debug = debug;
1803 nigel 35 int do_G = 0;
1804     int do_g = 0;
1805 nigel 25 int do_showinfo = showinfo;
1806 nigel 35 int do_showrest = 0;
1807 ph10 616 int do_showcaprest = 0;
1808 nigel 75 int do_flip = 0;
1809 nigel 93 int erroroffset, len, delimiter, poffset;
1810 nigel 3
1811 ph10 810 use_utf = 0;
1812 ph10 211 debug_lengths = 1;
1813 nigel 63
1814 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1815 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1816 nigel 63 fflush(outfile);
1817 nigel 3
1818     p = buffer;
1819     while (isspace(*p)) p++;
1820     if (*p == 0) continue;
1821    
1822 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1823 nigel 3
1824 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1825     {
1826 nigel 91 unsigned long int magic, get_options;
1827 ph10 756 pcre_uint8 sbuf[8];
1828 nigel 75 FILE *f;
1829    
1830     p++;
1831     pp = p + (int)strlen((char *)p);
1832     while (isspace(pp[-1])) pp--;
1833     *pp = 0;
1834    
1835     f = fopen((char *)p, "rb");
1836     if (f == NULL)
1837     {
1838     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1839     continue;
1840     }
1841    
1842     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1843    
1844     true_size =
1845     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1846     true_study_size =
1847     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1848    
1849     re = (real_pcre *)new_malloc(true_size);
1850 ph10 801 regex_gotten_store = first_gotten_store;
1851 nigel 75
1852     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1853    
1854     magic = ((real_pcre *)re)->magic_number;
1855     if (magic != MAGIC_NUMBER)
1856     {
1857     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1858     {
1859     do_flip = 1;
1860     }
1861     else
1862     {
1863     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1864     fclose(f);
1865     continue;
1866     }
1867     }
1868    
1869 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1870 nigel 75 do_flip? " (byte-inverted)" : "", p);
1871    
1872 ph10 612 /* Now see if there is any following study data. */
1873 nigel 75
1874     if (true_study_size != 0)
1875     {
1876     pcre_study_data *psd;
1877    
1878     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1879     extra->flags = PCRE_EXTRA_STUDY_DATA;
1880    
1881     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1882     extra->study_data = psd;
1883    
1884     if (fread(psd, 1, true_study_size, f) != true_study_size)
1885     {
1886     FAIL_READ:
1887     fprintf(outfile, "Failed to read data from %s\n", p);
1888 zherczeg 809 if (extra != NULL)
1889     {
1890     PCRE_FREE_STUDY(extra);
1891     }
1892 nigel 75 if (re != NULL) new_free(re);
1893     fclose(f);
1894     continue;
1895     }
1896     fprintf(outfile, "Study data loaded from %s\n", p);
1897     do_study = 1; /* To get the data output if requested */
1898     }
1899     else fprintf(outfile, "No study data\n");
1900    
1901 zherczeg 811 /* Flip the necessary bytes. */
1902     if (do_flip != 0)
1903     {
1904     PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
1905     }
1906    
1907     /* Need to know if UTF-8 for printing data strings */
1908    
1909     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1910     use_utf = (get_options & PCRE_UTF8) != 0;
1911    
1912 nigel 75 fclose(f);
1913     goto SHOW_INFO;
1914     }
1915    
1916     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1917 zherczeg 811 the pattern; if it isn't complete, read more. */
1918 nigel 75
1919 nigel 3 delimiter = *p++;
1920    
1921 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1922 nigel 3 {
1923 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1924 nigel 3 goto SKIP_DATA;
1925     }
1926    
1927     pp = p;
1928 ph10 530 poffset = (int)(p - buffer);
1929 nigel 3
1930     for(;;)
1931     {
1932 nigel 29 while (*pp != 0)
1933     {
1934     if (*pp == '\\' && pp[1] != 0) pp++;
1935     else if (*pp == delimiter) break;
1936     pp++;
1937     }
1938 nigel 3 if (*pp != 0) break;
1939 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1940 nigel 3 {
1941     fprintf(outfile, "** Unexpected EOF\n");
1942 nigel 11 done = 1;
1943     goto CONTINUE;
1944 nigel 3 }
1945 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1946 nigel 3 }
1947    
1948 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1949     pointer to the correct relative point in the buffer. */
1950    
1951     p = buffer + poffset;
1952    
1953 nigel 29 /* If the first character after the delimiter is backslash, make
1954     the pattern end with backslash. This is purely to provide a way
1955     of testing for the error message when a pattern ends with backslash. */
1956    
1957     if (pp[1] == '\\') *pp++ = '\\';
1958    
1959 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1960     for callouts. */
1961 nigel 3
1962     *pp++ = 0;
1963 nigel 75 strcpy((char *)pbuffer, (char *)p);
1964 nigel 3
1965     /* Look for options after final delimiter */
1966    
1967     options = 0;
1968 ph10 801 study_options = 0;
1969 nigel 31 log_store = showstore; /* default from command line */
1970    
1971 nigel 3 while (*pp != 0)
1972     {
1973     switch (*pp++)
1974     {
1975 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1976 nigel 35 case 'g': do_g = 1; break;
1977 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1978     case 'm': options |= PCRE_MULTILINE; break;
1979     case 's': options |= PCRE_DOTALL; break;
1980     case 'x': options |= PCRE_EXTENDED; break;
1981 nigel 25
1982 ph10 616 case '+':
1983 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1984 ph10 616 break;
1985 ph10 654
1986     case '=': do_allcaps = 1; break;
1987 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1988 nigel 93 case 'B': do_debug = 1; break;
1989 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1990 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1991 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1992 nigel 75 case 'F': do_flip = 1; break;
1993 nigel 35 case 'G': do_G = 1; break;
1994 nigel 25 case 'I': do_showinfo = 1; break;
1995 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1996 ph10 512 case 'K': do_mark = 1; break;
1997 nigel 31 case 'M': log_store = 1; break;
1998 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1999 nigel 37
2000     #if !defined NOPOSIX
2001 nigel 3 case 'P': do_posix = 1; break;
2002 nigel 37 #endif
2003    
2004 ph10 654 case 'S':
2005 ph10 691 if (do_study == 0)
2006 ph10 612 {
2007 ph10 691 do_study = 1;
2008 ph10 667 if (*pp == '+')
2009     {
2010     study_options |= PCRE_STUDY_JIT_COMPILE;
2011 ph10 691 pp++;
2012     }
2013     }
2014 ph10 667 else
2015     {
2016 ph10 612 do_study = 0;
2017     no_force_study = 1;
2018 ph10 654 }
2019 ph10 612 break;
2020    
2021 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
2022 ph10 535 case 'W': options |= PCRE_UCP; break;
2023 nigel 3 case 'X': options |= PCRE_EXTRA; break;
2024 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2025 ph10 126 case 'Z': debug_lengths = 0; break;
2026 ph10 810 case '8': options |= PCRE_UTF8; use_utf = 1; break;
2027 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2028 ph10 545
2029 ph10 541 case 'T':
2030     switch (*pp++)
2031     {
2032     case '0': tables = tables0; break;
2033     case '1': tables = tables1; break;
2034 ph10 545
2035 ph10 541 case '\r':
2036     case '\n':
2037 ph10 545 case ' ':
2038     case 0:
2039 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
2040 ph10 545 goto SKIP_DATA;
2041    
2042     default:
2043 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2044 ph10 545 goto SKIP_DATA;
2045 ph10 541 }
2046 ph10 545 break;
2047 nigel 25
2048     case 'L':
2049     ppp = pp;
2050 nigel 93 /* The '\r' test here is so that it works on Windows. */
2051     /* The '0' test is just in case this is an unterminated line. */
2052     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2053 nigel 25 *ppp = 0;
2054     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2055     {
2056     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2057     goto SKIP_DATA;
2058     }
2059 nigel 93 locale_set = 1;
2060 nigel 25 tables = pcre_maketables();
2061     pp = ppp;
2062     break;
2063    
2064 nigel 75 case '>':
2065     to_file = pp;
2066     while (*pp != 0) pp++;
2067     while (isspace(pp[-1])) pp--;
2068     *pp = 0;
2069     break;
2070    
2071 nigel 91 case '<':
2072     {
2073 ph10 756 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2074 ph10 336 {
2075     options |= PCRE_JAVASCRIPT_COMPAT;
2076 ph10 345 pp += 3;
2077 ph10 336 }
2078     else
2079 ph10 345 {
2080 ph10 336 int x = check_newline(pp, outfile);
2081     if (x == 0) goto SKIP_DATA;
2082     options |= x;
2083     while (*pp++ != '>');
2084 ph10 345 }
2085 nigel 91 }
2086     break;
2087    
2088 nigel 77 case '\r': /* So that it works in Windows */
2089     case '\n':
2090     case ' ':
2091     break;
2092 nigel 75
2093 nigel 3 default:
2094     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2095     goto SKIP_DATA;
2096     }
2097     }
2098    
2099 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
2100 nigel 25 timing, showing, or debugging options, nor the ability to pass over
2101 ph10 805 local character tables. Neither does it have 16-bit support. */
2102 nigel 3
2103 nigel 37 #if !defined NOPOSIX
2104 nigel 3 if (posix || do_posix)
2105     {
2106     int rc;
2107     int cflags = 0;
2108 nigel 75
2109 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2110     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2111 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2112 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2113     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2114 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2115 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2116 nigel 87
2117 ph10 801 first_gotten_store = 0;
2118 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
2119    
2120     /* Compilation failed; go back for another re, skipping to blank line
2121     if non-interactive. */
2122    
2123     if (rc != 0)
2124     {
2125 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2126 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2127     goto SKIP_DATA;
2128     }
2129     }
2130    
2131     /* Handle compiling via the native interface */
2132    
2133     else
2134 nigel 37 #endif /* !defined NOPOSIX */
2135    
2136 nigel 3 {
2137 ph10 412 unsigned long int get_options;
2138 ph10 808
2139     /* In 16-bit mode, convert the input. */
2140    
2141 ph10 805 #ifdef SUPPORT_PCRE16
2142 ph10 808 if (use_pcre16)
2143     {
2144 ph10 810 if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2145     {
2146     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2147     "converted to UTF-16\n");
2148     goto SKIP_DATA;
2149     }
2150 ph10 808 p = (pcre_uint8 *)buffer16;
2151     }
2152 ph10 805 #endif
2153 ph10 416
2154 ph10 805 /* Compile many times when timing */
2155    
2156 nigel 93 if (timeit > 0)
2157 nigel 3 {
2158     register int i;
2159     clock_t time_taken;
2160     clock_t start_time = clock();
2161 nigel 93 for (i = 0; i < timeit; i++)
2162 nigel 3 {
2163 ph10 808 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2164 nigel 3 if (re != NULL) free(re);
2165     }
2166     time_taken = clock() - start_time;
2167 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
2168     (((double)time_taken * 1000.0) / (double)timeit) /
2169 nigel 63 (double)CLOCKS_PER_SEC);
2170 nigel 3 }
2171    
2172 ph10 801 first_gotten_store = 0;
2173 ph10 808 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2174 nigel 3
2175     /* Compilation failed; go back for another re, skipping to blank line
2176     if non-interactive. */
2177    
2178     if (re == NULL)
2179     {
2180     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2181     SKIP_DATA:
2182     if (infile != stdin)
2183     {
2184     for (;;)
2185     {
2186 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
2187 nigel 11 {
2188     done = 1;
2189     goto CONTINUE;
2190     }
2191 nigel 3 len = (int)strlen((char *)buffer);
2192     while (len > 0 && isspace(buffer[len-1])) len--;
2193     if (len == 0) break;
2194     }
2195     fprintf(outfile, "\n");
2196     }
2197 nigel 25 goto CONTINUE;
2198 nigel 3 }
2199 ph10 416
2200     /* Compilation succeeded. It is now possible to set the UTF-8 option from
2201     within the regex; check for this so that we know how to process the data
2202 ph10 412 lines. */
2203 ph10 416
2204 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2205 ph10 810 if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2206 nigel 3
2207 ph10 801 /* Extract the size for possible writing before possibly flipping it,
2208     and remember the store that was got. */
2209 nigel 3
2210 ph10 801 true_size = ((real_pcre *)re)->size;
2211     regex_gotten_store = first_gotten_store;
2212    
2213     /* Output code size information if requested */
2214    
2215 nigel 63 if (log_store)
2216     fprintf(outfile, "Memory allocation (code space): %d\n",
2217 ph10 801 (int)(first_gotten_store -
2218 nigel 63 sizeof(real_pcre) -
2219     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2220    
2221 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
2222 ph10 654 help with the matching, unless the pattern has the SS option, which
2223 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
2224     never sensible). */
2225 nigel 75
2226 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
2227 nigel 75 {
2228 nigel 93 if (timeit > 0)
2229 nigel 75 {
2230     register int i;
2231     clock_t time_taken;
2232     clock_t start_time = clock();
2233 nigel 93 for (i = 0; i < timeit; i++)
2234 ph10 805 {
2235 ph10 808 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2236     }
2237 nigel 75 time_taken = clock() - start_time;
2238 zherczeg 809 if (extra != NULL)
2239     {
2240     PCRE_FREE_STUDY(extra);
2241     }
2242 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
2243     (((double)time_taken * 1000.0) / (double)timeit) /
2244 nigel 75 (double)CLOCKS_PER_SEC);
2245     }
2246 ph10 808 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2247 nigel 75 if (error != NULL)
2248     fprintf(outfile, "Failed to study: %s\n", error);
2249     else if (extra != NULL)
2250 ph10 801 {
2251 nigel 75 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2252 ph10 801 if (log_store)
2253     {
2254     size_t jitsize;
2255     new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2256     if (jitsize != 0)
2257     fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2258     }
2259     }
2260 nigel 75 }
2261 ph10 512
2262 ph10 510 /* If /K was present, we set up for handling MARK data. */
2263 ph10 512
2264 ph10 510 if (do_mark)
2265     {
2266     if (extra == NULL)
2267     {
2268     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2269     extra->flags = 0;
2270     }
2271 ph10 512 extra->mark = &markptr;
2272 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
2273 ph10 512 }
2274 nigel 75
2275 ph10 805 /* Extract and display information from the compiled data if required. */
2276 nigel 75
2277     SHOW_INFO:
2278    
2279 nigel 93 if (do_debug)
2280     {
2281     fprintf(outfile, "------------------------------------------------------------------\n");
2282 zherczeg 809 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2283 ph10 805 if (use_pcre16)
2284     pcre16_printint(re, outfile, debug_lengths);
2285 ph10 808 else
2286 ph10 805 pcre_printint(re, outfile, debug_lengths);
2287 zherczeg 809 #elif defined SUPPORT_PCRE8
2288     pcre_printint(re, outfile, debug_lengths);
2289     #else
2290     pcre16_printint(re, outfile, debug_lengths);
2291     #endif
2292 nigel 93 }
2293 ph10 416
2294 ph10 412 /* We already have the options in get_options (see above) */
2295 nigel 93
2296 nigel 25 if (do_showinfo)
2297 nigel 3 {
2298 ph10 412 unsigned long int all_options;
2299 nigel 79 #if !defined NOINFOCHECK
2300 nigel 43 int old_first_char, old_options, old_count;
2301 nigel 79 #endif
2302 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2303 ph10 227 hascrorlf;
2304 nigel 63 int nameentrysize, namecount;
2305 ph10 756 const pcre_uchar *nametable;
2306 nigel 3
2307 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2308     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2309     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2310 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2311 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2312 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2313     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2314 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2315 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2316     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2317 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2318 nigel 43
2319 ph10 805 /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2320     that it gives the same results as the new function. */
2321 ph10 808
2322 nigel 79 #if !defined NOINFOCHECK
2323 ph10 805 if (!use_pcre16)
2324 ph10 808 {
2325 ph10 805 old_count = pcre_info(re, &old_options, &old_first_char);
2326     if (count < 0) fprintf(outfile,
2327     "Error %d from pcre_info()\n", count);
2328     else
2329     {
2330     if (old_count != count) fprintf(outfile,
2331     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2332     old_count);
2333 ph10 808
2334 ph10 805 if (old_first_char != first_char) fprintf(outfile,
2335     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2336     first_char, old_first_char);
2337 ph10 808
2338 ph10 805 if (old_options != (int)get_options) fprintf(outfile,
2339     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2340     get_options, old_options);
2341     }
2342 ph10 808 }
2343 nigel 79 #endif
2344 nigel 43
2345 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
2346 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2347 nigel 77 (int)size, (int)regex_gotten_store);
2348 nigel 43
2349     fprintf(outfile, "Capturing subpattern count = %d\n", count);
2350     if (backrefmax > 0)
2351     fprintf(outfile, "Max back reference = %d\n", backrefmax);
2352 nigel 63
2353     if (namecount > 0)
2354     {
2355     fprintf(outfile, "Named capturing subpatterns:\n");
2356     while (namecount-- > 0)
2357     {
2358     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2359     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2360     GET2(nametable, 0));
2361     nametable += nameentrysize;
2362     }
2363     }
2364 ph10 172
2365 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2366 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2367 nigel 63
2368 nigel 75 all_options = ((real_pcre *)re)->options;
2369 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2370 nigel 75
2371 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
2372 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2373 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2374     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2375     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2376     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2377 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2378 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2379 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2380     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2381 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2382     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2383     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2384 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2385 ph10 810 ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2386 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2387 ph10 810 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2388 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2389 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2390 ph10 172
2391 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2392 nigel 43
2393 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2394 nigel 91 {
2395     case PCRE_NEWLINE_CR:
2396     fprintf(outfile, "Forced newline sequence: CR\n");
2397     break;
2398 nigel 43
2399 nigel 91 case PCRE_NEWLINE_LF:
2400     fprintf(outfile, "Forced newline sequence: LF\n");
2401     break;
2402    
2403     case PCRE_NEWLINE_CRLF:
2404     fprintf(outfile, "Forced newline sequence: CRLF\n");
2405     break;
2406    
2407 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2408     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2409     break;
2410    
2411 nigel 93 case PCRE_NEWLINE_ANY:
2412     fprintf(outfile, "Forced newline sequence: ANY\n");
2413     break;
2414    
2415 nigel 91 default:
2416     break;
2417     }
2418    
2419 nigel 43 if (first_char == -1)
2420     {
2421 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2422 nigel 43 }
2423     else if (first_char < 0)
2424     {
2425     fprintf(outfile, "No first char\n");
2426     }
2427     else
2428     {
2429 zherczeg 774 const char *caseless =
2430     ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2431 nigel 63 "" : " (caseless)";
2432 ph10 810
2433 ph10 808 if (PRINTOK(first_char))
2434 zherczeg 774 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2435 nigel 3 else
2436 ph10 810 {
2437     fprintf(outfile, "First char = ");
2438     pchar(first_char, outfile);
2439     fprintf(outfile, "%s\n", caseless);
2440     }
2441 nigel 43 }
2442 nigel 37
2443 nigel 43 if (need_char < 0)
2444     {
2445     fprintf(outfile, "No need char\n");
2446 nigel 3 }
2447 nigel 43 else
2448     {
2449 zherczeg 774 const char *caseless =
2450     ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2451 nigel 63 "" : " (caseless)";
2452 zherczeg 774
2453 ph10 808 if (PRINTOK(need_char))
2454 zherczeg 774 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2455 nigel 43 else
2456 zherczeg 774 fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2457 nigel 43 }
2458 nigel 75
2459     /* Don't output study size; at present it is in any case a fixed
2460     value, but it varies, depending on the computer architecture, and
2461     so messes up the test suite. (And with the /F option, it might be
2462 ph10 654 flipped.) If study was forced by an external -s, don't show this
2463 ph10 612 information unless -i or -d was also present. This means that, except
2464     when auto-callouts are involved, the output from runs with and without
2465     -s should be identical. */
2466 nigel 75
2467 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2468 nigel 75 {
2469     if (extra == NULL)
2470     fprintf(outfile, "Study returned NULL\n");
2471     else
2472     {
2473 ph10 756 pcre_uint8 *start_bits = NULL;
2474 ph10 455 int minlength;
2475 ph10 461
2476 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2477 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2478    
2479 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2480     if (start_bits == NULL)
2481 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2482 nigel 75 else
2483     {
2484     int i;
2485     int c = 24;
2486     fprintf(outfile, "Starting byte set: ");
2487     for (i = 0; i < 256; i++)
2488     {
2489     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2490     {
2491     if (c > 75)
2492     {
2493     fprintf(outfile, "\n ");
2494     c = 2;
2495     }
2496 ph10 808 if (PRINTOK(i) && i != ' ')
2497 nigel 75 {
2498     fprintf(outfile, "%c ", i);
2499     c += 2;
2500     }
2501     else
2502     {
2503     fprintf(outfile, "\\x%02x ", i);
2504     c += 5;
2505     }
2506     }
2507     }
2508     fprintf(outfile, "\n");
2509     }
2510     }
2511 ph10 691
2512 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
2513 ph10 691
2514 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2515     {
2516 ph10 691 int jit;
2517 ph10 667 new_info(re, extra, PCRE_INFO_JIT, &jit);
2518 ph10 691 if (jit)
2519     fprintf(outfile, "JIT study was successful\n");
2520     else
2521     #ifdef SUPPORT_JIT
2522     fprintf(outfile, "JIT study was not successful\n");
2523 ph10 667 #else
2524 ph10 691 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2525 ph10 667 #endif
2526 ph10 691 }
2527 nigel 75 }
2528 nigel 3 }
2529    
2530 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2531     that is all. The first 8 bytes of the file are the regex length and then
2532     the study length, in big-endian order. */
2533 nigel 3
2534 nigel 75 if (to_file != NULL)
2535 nigel 3 {
2536 zherczeg 811 /* If the 'F' option was present, we flip the bytes of all the integer
2537     fields in the regex data block and the study block. This is to make it
2538     possible to test PCRE's handling of byte-flipped patterns, e.g. those
2539     compiled on a different architecture. */
2540    
2541     if (do_flip)
2542     {
2543     real_pcre *rre = (real_pcre *)re;
2544     rre->magic_number =
2545     byteflip(rre->magic_number, sizeof(rre->magic_number));
2546     rre->size = byteflip(rre->size, sizeof(rre->size));
2547     rre->options = byteflip(rre->options, sizeof(rre->options));
2548     rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2549     rre->top_bracket =
2550     (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2551     rre->top_backref =
2552     (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2553     rre->first_char =
2554     (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2555     rre->req_char =
2556     (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2557     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2558     sizeof(rre->name_table_offset));
2559     rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2560     sizeof(rre->name_entry_size));
2561     rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2562     sizeof(rre->name_count));
2563    
2564     if (extra != NULL)
2565     {
2566     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2567     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2568     rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2569     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2570     }
2571     }
2572    
2573 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2574     if (f == NULL)
2575 nigel 3 {
2576 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2577 nigel 3 }
2578 nigel 75 else
2579     {
2580 ph10 756 pcre_uint8 sbuf[8];
2581     sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2582     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2583     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
2584     sbuf[3] = (pcre_uint8)((true_size) & 255);
2585 ph10 259
2586 ph10 756 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2587     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2588     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
2589     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2590 nigel 3
2591 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2592     fwrite(re, 1, true_size, f) < true_size)
2593     {
2594     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2595     }
2596 nigel 3 else
2597     {
2598 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2599 ph10 654
2600 ph10 658 /* If there is study data, write it. */
2601 ph10 654
2602 nigel 75 if (extra != NULL)
2603 nigel 3 {
2604 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2605     true_study_size)
2606 nigel 3 {
2607 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2608     strerror(errno));
2609 nigel 3 }
2610 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2611 nigel 3 }
2612     }
2613 nigel 75 fclose(f);
2614 nigel 3 }
2615 nigel 77
2616     new_free(re);
2617 zherczeg 809 if (extra != NULL)
2618     {
2619     PCRE_FREE_STUDY(extra);
2620     }
2621 ph10 545 if (locale_set)
2622 ph10 541 {
2623     new_free((void *)tables);
2624     setlocale(LC_CTYPE, "C");
2625 ph10 545 locale_set = 0;
2626     }
2627 nigel 75 continue; /* With next regex */
2628 nigel 3 }
2629 nigel 75 } /* End of non-POSIX compile */
2630 nigel 3
2631     /* Read data lines and test them */
2632    
2633     for (;;)
2634     {
2635 ph10 756 pcre_uint8 *q;
2636     pcre_uint8 *bptr;
2637 nigel 57 int *use_offsets = offsets;
2638 nigel 53 int use_size_offsets = size_offsets;
2639 nigel 63 int callout_data = 0;
2640     int callout_data_set = 0;
2641 nigel 3 int count, c;
2642 nigel 29 int copystrings = 0;
2643 ph10 386 int find_match_limit = default_find_match_limit;
2644 nigel 29 int getstrings = 0;
2645     int getlist = 0;
2646 nigel 39 int gmatched = 0;
2647 nigel 35 int start_offset = 0;
2648 ph10 579 int start_offset_sign = 1;
2649 nigel 41 int g_notempty = 0;
2650 nigel 77 int use_dfa = 0;
2651 nigel 3
2652     options = 0;
2653    
2654 nigel 91 *copynames = 0;
2655     *getnames = 0;
2656    
2657     copynamesptr = copynames;
2658     getnamesptr = getnames;
2659    
2660 nigel 63 pcre_callout = callout;
2661     first_callout = 1;
2662 ph10 654 last_callout_mark = NULL;
2663 nigel 63 callout_extra = 0;
2664     callout_count = 0;
2665     callout_fail_count = 999999;
2666     callout_fail_id = -1;
2667 nigel 73 show_malloc = 0;
2668 nigel 63
2669 nigel 91 if (extra != NULL) extra->flags &=
2670     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2671    
2672     len = 0;
2673     for (;;)
2674 nigel 11 {
2675 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2676 nigel 91 {
2677 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2678     {
2679 ph10 545 fprintf(outfile, "\n");
2680 ph10 537 break;
2681 ph10 545 }
2682 nigel 91 done = 1;
2683     goto CONTINUE;
2684     }
2685     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2686     len = (int)strlen((char *)buffer);
2687     if (buffer[len-1] == '\n') break;
2688 nigel 11 }
2689 nigel 3
2690     while (len > 0 && isspace(buffer[len-1])) len--;
2691     buffer[len] = 0;
2692     if (len == 0) break;
2693    
2694     p = buffer;
2695     while (isspace(*p)) p++;
2696    
2697 ph10 147 bptr = q = dbuffer;
2698 nigel 3 while ((c = *p++) != 0)
2699     {
2700     int i = 0;
2701     int n = 0;
2702 nigel 63
2703 nigel 3 if (c == '\\') switch ((c = *p++))
2704     {
2705     case 'a': c = 7; break;
2706     case 'b': c = '\b'; break;
2707     case 'e': c = 27; break;
2708     case 'f': c = '\f'; break;
2709     case 'n': c = '\n'; break;
2710     case 'r': c = '\r'; break;
2711     case 't': c = '\t'; break;
2712     case 'v': c = '\v'; break;
2713    
2714     case '0': case '1': case '2': case '3':
2715     case '4': case '5': case '6': case '7':
2716     c -= '0';
2717     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2718     c = c * 8 + *p++ - '0';
2719 nigel 91
2720     #if !defined NOUTF8
2721 ph10 810 if (use_utf && c > 255)
2722 nigel 91 {
2723 ph10 808 pcre_uint8 buff8[8];
2724 nigel 91 int ii, utn;
2725     utn = ord2utf8(c, buff8);
2726     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2727     c = buff8[ii]; /* Last byte */
2728     }
2729     #endif
2730 nigel 3 break;
2731    
2732     case 'x':
2733 nigel 49
2734     /* Handle \x{..} specially - new Perl thing for utf8 */
2735    
2736 nigel 79 #if !defined NOUTF8
2737 nigel 49 if (*p == '{')
2738     {
2739 ph10 808 pcre_uint8 *pt = p;
2740 nigel 49 c = 0;
2741 ph10 738
2742 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2743     when isxdigit() is a macro that refers to its argument more than
2744     once. This is banned by the C Standard, but apparently happens in at
2745     least one MacOS environment. */
2746 ph10 738
2747 ph10 735 for (pt++; isxdigit(*pt); pt++)
2748 ph10 734 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2749 nigel 49 if (*pt == '}')
2750     {
2751 ph10 808 pcre_uint8 buff8[8];
2752 nigel 49 int ii, utn;
2753 ph10 810 if (use_utf)
2754 ph10 358 {
2755 ph10 355 utn = ord2utf8(c, buff8);
2756     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2757     c = buff8[ii]; /* Last byte */
2758     }
2759     else
2760     {
2761 ph10 358 if (c > 255)
2762 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2763     "UTF-8 mode is not enabled.\n"
2764     "** Truncation will probably give the wrong result.\n", c);
2765 ph10 358 }
2766 nigel 49 p = pt + 1;
2767     break;
2768     }
2769     /* Not correct form; fall through */
2770     }
2771 nigel 79 #endif
2772 nigel 49
2773     /* Ordinary \x */
2774    
2775 nigel 3 c = 0;
2776     while (i++ < 2 && isxdigit(*p))
2777     {
2778 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2779 nigel 3 p++;
2780     }
2781     break;
2782    
2783 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2784 nigel 3 p--;
2785     continue;
2786    
2787 nigel 75 case '>':
2788 ph10 579 if (*p == '-')
2789 ph10 567 {
2790     start_offset_sign = -1;
2791     p++;
2792 ph10 579 }
2793 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2794 ph10 579 start_offset *= start_offset_sign;
2795 nigel 75 continue;
2796    
2797 nigel 3 case 'A': /* Option setting */
2798     options |= PCRE_ANCHORED;
2799     continue;
2800    
2801     case 'B':
2802     options |= PCRE_NOTBOL;
2803     continue;
2804    
2805 nigel 29 case 'C':
2806 nigel 63 if (isdigit(*p)) /* Set copy string */
2807     {
2808     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2809     copystrings |= 1 << n;
2810     }
2811     else if (isalnum(*p))
2812     {
2813 ph10 756 pcre_uchar *npp = copynamesptr;
2814 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2815 nigel 91 *npp++ = 0;
2816 nigel 67 *npp = 0;
2817 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2818 nigel 63 if (n < 0)
2819 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2820     copynamesptr = npp;
2821 nigel 63 }
2822     else if (*p == '+')
2823     {
2824     callout_extra = 1;
2825     p++;
2826     }
2827     else if (*p == '-')
2828     {
2829     pcre_callout = NULL;
2830     p++;
2831     }
2832     else if (*p == '!')
2833     {
2834     callout_fail_id = 0;
2835     p++;
2836     while(isdigit(*p))
2837     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2838     callout_fail_count = 0;
2839     if (*p == '!')
2840     {
2841     p++;
2842     while(isdigit(*p))
2843     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2844     }
2845     }
2846     else if (*p == '*')
2847     {
2848     int sign = 1;
2849     callout_data = 0;
2850     if (*(++p) == '-') { sign = -1; p++; }
2851     while(isdigit(*p))
2852     callout_data = callout_data * 10 + *p++ - '0';
2853     callout_data *= sign;
2854     callout_data_set = 1;
2855     }
2856 nigel 29 continue;
2857    
2858 nigel 79 #if !defined NODFA
2859 nigel 77 case 'D':
2860 nigel 79 #if !defined NOPOSIX
2861 nigel 77 if (posix || do_posix)
2862     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2863     else
2864 nigel 79 #endif
2865 nigel 77 use_dfa = 1;
2866     continue;
2867 ph10 553 #endif
2868 nigel 77
2869 ph10 553 #if !defined NODFA
2870 nigel 77 case 'F':
2871     options |= PCRE_DFA_SHORTEST;
2872     continue;
2873 nigel 79 #endif
2874 nigel 77
2875 nigel 29 case 'G':
2876 nigel 63 if (isdigit(*p))
2877     {
2878     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2879     getstrings |= 1 << n;
2880     }
2881     else if (isalnum(*p))
2882     {
2883 ph10 756 pcre_uchar *npp = getnamesptr;
2884 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2885 nigel 91 *npp++ = 0;
2886 nigel 67 *npp = 0;
2887 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2888 nigel 63 if (n < 0)
2889 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2890     getnamesptr = npp;
2891 nigel 63 }
2892 nigel 29 continue;
2893 ph10 691
2894 ph10 667 case 'J':
2895     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2896 ph10 691 if (extra != NULL
2897     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2898 ph10 667 && extra->executable_jit != NULL)
2899 ph10 691 {
2900 ph10 667 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2901     jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2902 ph10 675 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2903 ph10 691 }
2904 ph10 667 continue;
2905 nigel 29
2906     case 'L':
2907     getlist = 1;
2908     continue;
2909    
2910 nigel 63 case 'M':
2911     find_match_limit = 1;
2912     continue;
2913    
2914 nigel 37 case 'N':
2915 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2916     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2917 ph10 461 else
2918 ph10 442 options |= PCRE_NOTEMPTY;
2919 nigel 37 continue;
2920    
2921 nigel 3 case 'O':
2922     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2923 nigel 53 if (n > size_offsets_max)
2924     {
2925     size_offsets_max = n;
2926 nigel 57 free(offsets);
2927 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2928 nigel 53 if (offsets == NULL)
2929     {
2930     printf("** Failed to get %d bytes of memory for offsets vector\n",
2931 ph10 151 (int)(size_offsets_max * sizeof(int)));
2932 nigel 77 yield = 1;
2933     goto EXIT;
2934 nigel 53 }
2935     }
2936     use_size_offsets = n;
2937 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2938 nigel 3 continue;
2939    
2940 nigel 75 case 'P':
2941 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2942 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2943 nigel 75 continue;
2944    
2945 nigel 91 case 'Q':
2946     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2947     if (extra == NULL)
2948     {
2949     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2950     extra->flags = 0;
2951     }
2952     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2953     extra->match_limit_recursion = n;
2954     continue;
2955    
2956     case 'q':
2957     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2958     if (extra == NULL)
2959     {
2960     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2961     extra->flags = 0;
2962     }
2963     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2964     extra->match_limit = n;
2965     continue;
2966    
2967 nigel 79 #if !defined NODFA
2968 nigel 77 case 'R':
2969     options |= PCRE_DFA_RESTART;
2970     continue;
2971 nigel 79 #endif
2972 nigel 77
2973 nigel 73 case 'S':
2974     show_malloc = 1;
2975     continue;
2976 ph10 392
2977 ph10 389 case 'Y':
2978     options |= PCRE_NO_START_OPTIMIZE;
2979 ph10 392 continue;
2980 nigel 73
2981 nigel 3 case 'Z':
2982     options |= PCRE_NOTEOL;
2983     continue;
2984 nigel 71
2985     case '?':
2986     options |= PCRE_NO_UTF8_CHECK;
2987     continue;
2988 nigel 91
2989     case '<':
2990     {
2991     int x = check_newline(p, outfile);
2992     if (x == 0) goto NEXT_DATA;
2993     options |= x;
2994     while (*p++ != '>');
2995     }
2996     continue;
2997 nigel 3 }
2998 nigel 9 *q++ = c;
2999 nigel 3 }
3000 nigel 9 *q = 0;
3001 ph10 530 len = (int)(q - dbuffer);
3002 ph10 545
3003 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
3004 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3005 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
3006 ph10 371
3007 ph10 363 #if !defined NOPOSIX
3008     if (posix || do_posix)
3009     {
3010     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
3011 ph10 371 bptr += buffer_size - len - 1;
3012 ph10 363 }
3013 ph10 371 else
3014     #endif
3015 ph10 363 {
3016     memmove(bptr + buffer_size - len, bptr, len);
3017 ph10 371 bptr += buffer_size - len;
3018     }
3019 nigel 3
3020 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
3021     {
3022     printf("**Match limit not relevant for DFA matching: ignored\n");
3023     find_match_limit = 0;
3024     }
3025    
3026 nigel 3 /* Handle matching via the POSIX interface, which does not
3027 nigel 63 support timing or playing with the match limit or callout data. */
3028 nigel 3
3029 nigel 37 #if !defined NOPOSIX
3030 nigel 3 if (posix || do_posix)
3031     {
3032     int rc;
3033     int eflags = 0;
3034 nigel 63 regmatch_t *pmatch = NULL;
3035     if (use_size_offsets > 0)
3036 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3037 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3038     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3039 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3040 nigel 3
3041 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3042 nigel 3
3043     if (rc != 0)
3044     {
3045 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3046 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3047     }
3048 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3049     != 0)
3050     {
3051     fprintf(outfile, "Matched with REG_NOSUB\n");
3052     }
3053 nigel 3 else
3054     {
3055 nigel 7 size_t i;
3056 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
3057 nigel 3 {
3058     if (pmatch[i].rm_so >= 0)
3059     {
3060 nigel 23 fprintf(outfile, "%2d: ", (int)i);
3061 ph10 808 PCHARSV(dbuffer + pmatch[i].rm_so,
3062 nigel 63 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3063 nigel 3 fprintf(outfile, "\n");
3064 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3065 nigel 35 {
3066 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
3067 ph10 808 PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3068 nigel 63 outfile);
3069 nigel 35 fprintf(outfile, "\n");
3070     }
3071 nigel 3 }
3072     }
3073     }
3074 nigel 53 free(pmatch);
3075 ph10 808 goto NEXT_DATA;
3076 nigel 3 }
3077    
3078 ph10 808 #endif /* !defined NOPOSIX */
3079    
3080 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
3081 nigel 3
3082 ph10 808 #ifdef SUPPORT_PCRE16
3083     if (use_pcre16)
3084     {
3085     len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3086 ph10 810 if (len < 0)
3087     {
3088     fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3089     "converted to UTF-16\n");
3090     goto NEXT_DATA;
3091     }
3092 ph10 808 bptr = (pcre_uint8 *)buffer16;
3093     }
3094     #endif
3095 nigel 37
3096 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
3097 nigel 3 {
3098 ph10 512 markptr = NULL;
3099    
3100 nigel 93 if (timeitm > 0)
3101 nigel 3 {
3102     register int i;
3103     clock_t time_taken;
3104     clock_t start_time = clock();
3105 nigel 77
3106 nigel 79 #if !defined NODFA
3107 nigel 77 if (all_use_dfa || use_dfa)
3108     {
3109     int workspace[1000];
3110 nigel 93 for (i = 0; i < timeitm; i++)
3111 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3112 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
3113     sizeof(workspace)/sizeof(int));
3114     }
3115     else
3116 nigel 79 #endif
3117 nigel 77
3118 nigel 93 for (i = 0; i < timeitm; i++)
3119 ph10 808 {
3120     PCRE_EXEC(count, re, extra, bptr, len,
3121 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
3122 ph10 808 }
3123 nigel 3 time_taken = clock() - start_time;
3124 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
3125     (((double)time_taken * 1000.0) / (double)timeitm) /
3126 nigel 63 (double)CLOCKS_PER_SEC);
3127 nigel 3 }
3128    
3129 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
3130 nigel 87 varying limits in order to find the minimum value for the match limit and
3131 ph10 667 for the recursion limit. The match limits are relevant only to the normal
3132     running of pcre_exec(), so disable the JIT optimization. This makes it
3133     possible to run the same set of tests with and without JIT externally
3134     requested. */
3135 nigel 63
3136     if (find_match_limit)
3137     {
3138     if (extra == NULL)
3139     {
3140 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3141 nigel 63 extra->flags = 0;
3142     }
3143 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3144 ph10 691
3145 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
3146 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
3147     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3148     PCRE_ERROR_MATCHLIMIT, "match()");
3149 nigel 63
3150 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
3151     options|g_notempty, use_offsets, use_size_offsets,
3152     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3153     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3154 nigel 63 }
3155    
3156     /* If callout_data is set, use the interface with additional data */
3157    
3158     else if (callout_data_set)
3159     {
3160     if (extra == NULL)
3161     {
3162 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3163 nigel 63 extra->flags = 0;
3164     }
3165     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3166 nigel 71 extra->callout_data = &callout_data;
3167 ph10 808 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3168 nigel 63 options | g_notempty, use_offsets, use_size_offsets);
3169     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3170     }
3171    
3172     /* The normal case is just to do the match once, with the default
3173     value of match_limit. */
3174    
3175 nigel 79 #if !defined NODFA
3176 nigel 77 else if (all_use_dfa || use_dfa)
3177     {
3178     int workspace[1000];
3179 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3180 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
3181     sizeof(workspace)/sizeof(int));
3182     if (count == 0)
3183     {
3184     fprintf(outfile, "Matched, but too many subsidiary matches\n");
3185     count = use_size_offsets/2;
3186     }
3187     }
3188 nigel 79 #endif
3189 nigel 77
3190 nigel 75 else
3191     {
3192 ph10 808 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3193     options | g_notempty, use_offsets, use_size_offsets);
3194 nigel 77 if (count == 0)
3195     {
3196     fprintf(outfile, "Matched, but too many substrings\n");
3197     count = use_size_offsets/3;
3198     }
3199 nigel 75 }
3200 nigel 3
3201 nigel 39 /* Matched */
3202    
3203 nigel 3 if (count >= 0)
3204     {
3205 nigel 93 int i, maxcount;
3206    
3207     #if !defined NODFA
3208     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3209     #endif
3210     maxcount = use_size_offsets/3;
3211    
3212     /* This is a check against a lunatic return value. */
3213    
3214     if (count > maxcount)
3215     {
3216     fprintf(outfile,
3217     "** PCRE error: returned count %d is too big for offset size %d\n",
3218     count, use_size_offsets);
3219     count = use_size_offsets/3;
3220     if (do_g || do_G)
3221     {
3222     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3223     do_g = do_G = FALSE; /* Break g/G loop */
3224     }
3225     }
3226 ph10 654
3227 ph10 626 /* do_allcaps requests showing of all captures in the pattern, to check
3228     unset ones at the end. */
3229 ph10 654
3230 ph10 626 if (do_allcaps)
3231     {
3232     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3233 ph10 654 count++; /* Allow for full match */
3234     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3235     }
3236 nigel 93
3237 ph10 626 /* Output the captured substrings */
3238 ph10 654
3239 nigel 29 for (i = 0; i < count * 2; i += 2)
3240 nigel 3 {
3241 nigel 57 if (use_offsets[i] < 0)
3242 ph10 654 {
3243 ph10 626 if (use_offsets[i] != -1)
3244     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3245 ph10 654 use_offsets[i], i);
3246 ph10 626 if (use_offsets[i+1] != -1)
3247     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3248 ph10 654 use_offsets[i+1], i+1);
3249 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
3250 ph10 654 }
3251 nigel 3 else
3252     {
3253     fprintf(outfile, "%2d: ", i/2);
3254 ph10 808 PCHARSV(bptr + use_offsets[i],
3255 nigel 63 use_offsets[i+1] - use_offsets[i], outfile);
3256 nigel 3 fprintf(outfile, "\n");
3257 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3258 nigel 35 {
3259 ph10 616 fprintf(outfile, "%2d+ ", i/2);
3260 ph10 808 PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3261 ph10 616 outfile);
3262     fprintf(outfile, "\n");
3263 nigel 35 }
3264 nigel 3 }
3265     }
3266 ph10 512
3267 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3268 nigel 29
3269     for (i = 0; i < 32; i++)
3270     {
3271     if ((copystrings & (1 << i)) != 0)
3272     {
3273 nigel 91 char copybuffer[256];
3274 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3275 nigel 37 i, copybuffer, sizeof(copybuffer));
3276 nigel 29 if (rc < 0)
3277     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3278     else
3279 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
3280 nigel 29 }
3281     }
3282    
3283 nigel 91 for (copynamesptr = copynames;
3284     *copynamesptr != 0;
3285     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3286     {
3287     char copybuffer[256];
3288     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3289     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3290     if (rc < 0)
3291     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3292     else
3293     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3294     }
3295    
3296 nigel 29 for (i = 0; i < 32; i++)
3297     {
3298     if ((getstrings & (1 << i)) != 0)
3299     {
3300     const char *substring;
3301 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
3302 nigel 29 i, &substring);
3303     if (rc < 0)
3304     fprintf(outfile, "get substring %d failed %d\n", i, rc);
3305     else
3306     {
3307     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
3308 nigel 49 pcre_free_substring(substring);
3309 nigel 29 }
3310     }
3311     }
3312    
3313 nigel 91 for (getnamesptr = getnames;
3314     *getnamesptr != 0;
3315     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3316     {
3317     const char *substring;
3318     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
3319     count, (char *)getnamesptr, &substring);
3320     if (rc < 0)
3321     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
3322     else
3323     {
3324     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
3325     pcre_free_substring(substring);
3326     }
3327     }
3328    
3329 nigel 29 if (getlist)
3330     {
3331     const char **stringlist;
3332 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
3333 nigel 29 &stringlist);
3334     if (rc < 0)
3335     fprintf(outfile, "get substring list failed %d\n", rc);
3336     else
3337     {
3338     for (i = 0; i < count; i++)
3339     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3340     if (stringlist[i] != NULL)
3341     fprintf(outfile, "string list not terminated by NULL\n");
3342 nigel 49 pcre_free_substring_list(stringlist);
3343 nigel 29 }
3344     }
3345 nigel 39 }
3346 nigel 29
3347 nigel 75 /* There was a partial match */
3348    
3349     else if (count == PCRE_ERROR_PARTIAL)
3350     {
3351 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
3352     else fprintf(outfile, "Partial match, mark=%s", markptr);
3353 ph10 426 if (use_size_offsets > 1)
3354     {
3355     fprintf(outfile, ": ");
3356 ph10 808 PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3357 ph10 461 outfile);
3358     }
3359 nigel 77 fprintf(outfile, "\n");
3360 nigel 75 break; /* Out of the /g loop */
3361     }
3362    
3363 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
3364 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
3365     to advance the start offset, and continue. We won't be at the end of the
3366     string - that was checked before setting g_notempty.
3367 nigel 39
3368 ph10 566 Complication arises in the case when the newline convention is "any",
3369 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
3370     terminated by CRLF, an advance of one character just passes the \r,
3371 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
3372 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
3373     newline setting in the pattern; if none was set, use pcre_config() to
3374 ph10 566 find the default.
3375 ph10 144
3376 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
3377     character, not one byte. */
3378    
3379 nigel 3 else
3380     {
3381 nigel 41 if (g_notempty != 0)
3382 nigel 35 {
3383 nigel 73 int onechar = 1;
3384 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
3385 nigel 57 use_offsets[0] = start_offset;
3386 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
3387     {
3388     int d;
3389     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3390 ph10 391 /* Note that these values are always the ASCII ones, even in
3391     EBCDIC environments. CR = 13, NL = 10. */
3392     obits = (d == 13)? PCRE_NEWLINE_CR :
3393     (d == 10)? PCRE_NEWLINE_LF :
3394     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3395 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3396 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
3397     }
3398 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3399 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3400 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3401 ph10 149 &&
3402 ph10 143 start_offset < len - 1 &&
3403     bptr[start_offset] == '\r' &&
3404     bptr[start_offset+1] == '\n')
3405 ph10 144 onechar++;
3406 ph10 810 else if (use_utf)
3407 nigel 73 {
3408     while (start_offset + onechar < len)
3409     {
3410 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3411 ph10 579 onechar++;
3412 nigel 73 }
3413     }
3414     use_offsets[1] = start_offset + onechar;
3415 nigel 35 }
3416 nigel 41 else
3417     {
3418 ph10 598 switch(count)
3419 ph10 654 {
3420 ph10 598 case PCRE_ERROR_NOMATCH:
3421 ph10 512 if (gmatched == 0)
3422 ph10 510 {
3423     if (markptr == NULL) fprintf(outfile, "No match\n");
3424     else fprintf(outfile, "No match, mark = %s\n", markptr);
3425 ph10 512 }
3426 ph10 598 break;
3427 ph10 654
3428 ph10 598 case PCRE_ERROR_BADUTF8:
3429     case PCRE_ERROR_SHORTUTF8:
3430     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3431     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3432     if (use_size_offsets >= 2)
3433 ph10 654 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3434 ph10 598 use_offsets[1]);
3435 ph10 654 fprintf(outfile, "\n");
3436     break;
3437    
3438 ph10 598 default:
3439 ph10 654 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3440 ph10 604 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3441 ph10 654 else
3442     fprintf(outfile, "Error %d (Unexpected value)\n", count);
3443 ph10 598 break;
3444 nigel 41 }
3445 ph10 654
3446 nigel 41 break; /* Out of the /g loop */
3447     }
3448 nigel 3 }
3449 nigel 35
3450 nigel 39 /* If not /g or /G we are done */
3451    
3452     if (!do_g && !do_G) break;
3453    
3454 nigel 41 /* If we have matched an empty string, first check to see if we are at
3455 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3456     Perl's /g options does. This turns out to be rather cunning. First we set
3457     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3458 nigel 47 same point. If this fails (picked up above) we advance to the next
3459 ph10 143 character. */
3460 ph10 142
3461 nigel 41 g_notempty = 0;
3462 ph10 142
3463 nigel 57 if (use_offsets[0] == use_offsets[1])
3464 nigel 41 {
3465 nigel 57 if (use_offsets[0] == len) break;
3466 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3467 nigel 41 }
3468 nigel 39
3469     /* For /g, update the start offset, leaving the rest alone */
3470    
3471 ph10 143 if (do_g) start_offset = use_offsets[1];
3472 nigel 39
3473     /* For /G, update the pointer and length */
3474    
3475     else
3476 nigel 35 {
3477 ph10 143 bptr += use_offsets[1];
3478     len -= use_offsets[1];
3479 nigel 35 }
3480 nigel 39 } /* End of loop for /g and /G */
3481 nigel 91
3482     NEXT_DATA: continue;
3483 nigel 39 } /* End of loop for data lines */
3484 nigel 3
3485 nigel 11 CONTINUE:
3486 nigel 37
3487     #if !defined NOPOSIX
3488 nigel 3 if (posix || do_posix) regfree(&preg);
3489 nigel 37 #endif
3490    
3491 nigel 77 if (re != NULL) new_free(re);
3492 zherczeg 809 if (extra != NULL)
3493     {
3494     PCRE_FREE_STUDY(extra);
3495     }
3496 ph10 541 if (locale_set)
3497 nigel 25 {
3498 nigel 77 new_free((void *)tables);
3499 nigel 25 setlocale(LC_CTYPE, "C");
3500 nigel 93 locale_set = 0;
3501 nigel 25 }
3502 ph10 691 if (jit_stack != NULL)
3503 ph10 667 {
3504     pcre_jit_stack_free(jit_stack);
3505 ph10 691 jit_stack = NULL;
3506     }
3507 nigel 3 }
3508    
3509 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3510 nigel 77
3511     EXIT:
3512    
3513     if (infile != NULL && infile != stdin) fclose(infile);
3514     if (outfile != NULL && outfile != stdout) fclose(outfile);
3515    
3516     free(buffer);
3517     free(dbuffer);
3518     free(pbuffer);
3519     free(offsets);
3520    
3521 ph10 805 #ifdef SUPPORT_PCRE16
3522     if (buffer16 != NULL) free(buffer16);
3523     #endif
3524    
3525 nigel 77 return yield;
3526 nigel 3 }
3527    
3528 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12