/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 809 - (hide annotations) (download)
Mon Dec 19 11:04:45 2011 UTC (2 years, 9 months ago) by zherczeg
File MIME type: text/plain
File size: 103658 byte(s)
fixing existing and adding new byte-order related functions
1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 ph10 808 been extended and consequently is now rather, er, *very* untidy in places. The
8     addition of 16-bit support has made it even worse. :-(
9 nigel 63
10 nigel 75 -----------------------------------------------------------------------------
11     Redistribution and use in source and binary forms, with or without
12     modification, are permitted provided that the following conditions are met:
13    
14     * Redistributions of source code must retain the above copyright notice,
15     this list of conditions and the following disclaimer.
16    
17     * Redistributions in binary form must reproduce the above copyright
18     notice, this list of conditions and the following disclaimer in the
19     documentation and/or other materials provided with the distribution.
20    
21     * Neither the name of the University of Cambridge nor the names of its
22     contributors may be used to endorse or promote products derived from
23     this software without specific prior written permission.
24    
25     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
26     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
28     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
29     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
32     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
33     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
34     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
35     POSSIBILITY OF SUCH DAMAGE.
36     -----------------------------------------------------------------------------
37     */
38    
39    
40 ph10 200 #ifdef HAVE_CONFIG_H
41 ph10 236 #include "config.h"
42 ph10 200 #endif
43 ph10 199
44 nigel 3 #include <ctype.h>
45     #include <stdio.h>
46     #include <string.h>
47     #include <stdlib.h>
48     #include <time.h>
49 nigel 25 #include <locale.h>
50 nigel 75 #include <errno.h>
51 nigel 3
52 ph10 287 #ifdef SUPPORT_LIBREADLINE
53 ph10 343 #ifdef HAVE_UNISTD_H
54 ph10 287 #include <unistd.h>
55 ph10 343 #endif
56 ph10 287 #include <readline/readline.h>
57     #include <readline/history.h>
58     #endif
59 nigel 93
60 ph10 287
61 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
62     input and output without "b"; then I was told that "b" was needed in some
63     environments, so it was added for release 5.0 to both the input and output. (It
64     makes no difference on Unix-like systems.) Later I was told that it is wrong
65     for the input on Windows. I've now abstracted the modes into two macros that
66     are set here, to make it easier to fiddle with them, and removed "b" from the
67     input mode under Windows. */
68    
69     #if defined(_WIN32) || defined(WIN32)
70     #include <io.h> /* For _setmode() */
71     #include <fcntl.h> /* For _O_BINARY */
72     #define INPUT_MODE "r"
73     #define OUTPUT_MODE "wb"
74    
75 ph10 411 #ifndef isatty
76     #define isatty _isatty /* This is what Windows calls them, I'm told, */
77     #endif /* though in some environments they seem to */
78     /* be already defined, hence the #ifndefs. */
79     #ifndef fileno
80 ph10 343 #define fileno _fileno
81 ph10 411 #endif
82 ph10 343
83 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85     #ifdef __BORLANDC__
86     #define _setmode(handle, mode) setmode(handle, mode)
87     #endif
88    
89     /* Not Windows */
90    
91 nigel 93 #else
92     #include <sys/time.h> /* These two includes are needed */
93     #include <sys/resource.h> /* for setrlimit(). */
94     #define INPUT_MODE "rb"
95     #define OUTPUT_MODE "wb"
96 nigel 91 #endif
97    
98 nigel 93
99 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
100     displaying the results of pcre_study() and we also need to know about the
101     internal macros, structures, and other internal data values; pcretest has
102     "inside information" compared to a program that strictly follows the PCRE API.
103 nigel 37
104 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
105     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
106     appropriately for an application, not for building PCRE. */
107 nigel 77
108 ph10 145 #include "pcre.h"
109 nigel 77 #include "pcre_internal.h"
110    
111 ph10 808 /* The pcre_printint() function, which prints the internal form of a compiled
112     regex, is held in a separate file so that (a) it can be compiled in either
113     8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114 ph10 805 when that is compiled in debug mode. */
115    
116     #ifdef SUPPORT_PCRE8
117     void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118     #endif
119     #ifdef SUPPORT_PCRE16
120     void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121     #endif
122    
123 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
124     to keep two copies, we include the source file here, changing the names of the
125     external symbols to prevent clashes. */
126 nigel 77
127 ph10 351 #define _pcre_ucp_gentype ucp_gentype
128 ph10 667 #define _pcre_ucp_typerange ucp_typerange
129 nigel 85 #define _pcre_utf8_table1 utf8_table1
130     #define _pcre_utf8_table1_size utf8_table1_size
131     #define _pcre_utf8_table2 utf8_table2
132     #define _pcre_utf8_table3 utf8_table3
133     #define _pcre_utf8_table4 utf8_table4
134     #define _pcre_utt utt
135     #define _pcre_utt_size utt_size
136 ph10 240 #define _pcre_utt_names utt_names
137 nigel 85 #define _pcre_OP_lengths OP_lengths
138    
139     #include "pcre_tables.c"
140    
141 ph10 498 /* The definition of the macro PRINTABLE, which determines whether to print an
142 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
143 ph10 805 the same as in the printint.src file. We uses it here in cases when the locale
144     has not been explicitly changed, so as to get consistent output from systems
145     that differ in their output from isprint() even in the "C" locale. */
146 nigel 93
147 ph10 805 #ifdef EBCDIC
148     #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149     #else
150     #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151     #endif
152    
153 ph10 808 #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154 nigel 85
155 nigel 37 /* It is possible to compile this test program without including support for
156     testing the POSIX interface, though this is not available via the standard
157     Makefile. */
158    
159     #if !defined NOPOSIX
160 nigel 3 #include "pcreposix.h"
161 nigel 37 #endif
162 nigel 3
163 ph10 808 /* It is also possible, originally for the benefit of a version that was
164     imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165     without the interface to the DFA matcher (NODFA), and without the doublecheck
166     of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167     out the UTF8 support if PCRE is built without it. */
168 nigel 79
169 ph10 107 #ifndef SUPPORT_UTF8
170     #ifndef NOUTF8
171     #define NOUTF8
172     #endif
173     #endif
174 nigel 79
175 ph10 808 /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176     for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177     only from one place and is handled differently). I couldn't dream up any way of
178     using a single macro to do this in a generic way, because of the many different
179     argument requirements. We know that at least one of SUPPORT_PCRE8 and
180     SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181     use these in the definitions of generic macros. */
182 ph10 107
183 ph10 808 #ifdef SUPPORT_PCRE8
184     #define PCHARS8(lv, p, len, f) \
185     lv = pchars((pcre_uint8 *)p, len, f)
186    
187     #define PCHARSV8(p, len, f) \
188     (void)pchars((pcre_uint8 *)p, len, f)
189    
190     #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191     re = pcre_compile((char *)pat, options, error, erroffset, tables)
192    
193     #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194     offsets, size_offsets) \
195     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196     offsets, size_offsets)
197    
198     #define PCRE_STUDY8(extra, re, options, error) \
199     extra = pcre_study(re, options, error)
200    
201 zherczeg 809 #define PCRE_FREE_STUDY8(extra) \
202     pcre_free_study(extra)
203 ph10 808
204 zherczeg 809 #endif /* SUPPORT_PCRE8 */
205    
206    
207 ph10 808 #ifdef SUPPORT_PCRE16
208     #define PCHARS16(lv, p, len, f) \
209     lv = pchars16((PCRE_SPTR16)p, len, f)
210    
211     #define PCHARSV16(p, len, f) \
212     (void)pchars16((PCRE_SPTR16)p, len, f)
213    
214     #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
215     re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
216    
217     #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
218     offsets, size_offsets) \
219     count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
220     options, offsets, size_offsets)
221    
222     #define PCRE_STUDY16(extra, re, options, error) \
223     extra = pcre16_study(re, options, error)
224    
225 zherczeg 809 #define PCRE_FREE_STUDY16(extra) \
226     pcre16_free_study(extra)
227 ph10 808
228 zherczeg 809 #endif /* SUPPORT_PCRE16 */
229    
230    
231 ph10 808 /* ----- Both modes are supported; a runtime test is needed ----- */
232    
233     #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
234    
235     #define PCHARS(lv, p, len, f) \
236     if (use_pcre16) \
237     PCHARS16(lv, p, len, f); \
238     else \
239     PCHARS8(lv, p, len, f)
240    
241     #define PCHARSV(p, len, f) \
242     if (use_pcre16) \
243     PCHARSV16(p, len, f); \
244     else \
245     PCHARSV8(p, len, f)
246    
247     #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
248     if (use_pcre16) \
249     PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
250     else \
251     PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
252    
253     #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
254     offsets, size_offsets) \
255     if (use_pcre16) \
256     PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
257     offsets, size_offsets); \
258     else \
259     PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
260     offsets, size_offsets)
261    
262     #define PCRE_STUDY(extra, re, options, error) \
263     if (use_pcre16) \
264     PCRE_STUDY16(extra, re, options, error); \
265     else \
266     PCRE_STUDY8(extra, re, options, error)
267    
268 zherczeg 809 #define PCRE_FREE_STUDY(extra) \
269     if (use_pcre16) \
270     PCRE_FREE_STUDY16(extra); \
271     else \
272     PCRE_FREE_STUDY8(extra)
273    
274 ph10 808 /* ----- Only 8-bit mode is supported ----- */
275    
276     #elif defined SUPPORT_PCRE8
277 zherczeg 809 #define PCHARS PCHARS8
278     #define PCHARSV PCHARSV8
279     #define PCRE_COMPILE PCRE_COMPILE8
280     #define PCRE_EXEC PCRE_EXEC8
281     #define PCRE_STUDY PCRE_STUDY8
282     #define PCRE_FREE_STUDY PCRE_FREE_STUDY8
283 ph10 808
284     /* ----- Only 16-bit mode is supported ----- */
285    
286     #else
287 zherczeg 809 #define PCHARS PCHARS16
288     #define PCHARSV PCHARSV16
289     #define PCRE_COMPILE PCRE_COMPILE16
290     #define PCRE_EXEC PCRE_EXEC16
291     #define PCRE_STUDY PCRE_STUDY16
292     #define PCRE_FREE_STUDY PCRE_FREE_STUDY16
293 ph10 808 #endif
294    
295     /* ----- End of mode-specific function call macros ----- */
296    
297    
298 nigel 85 /* Other parameters */
299    
300 nigel 3 #ifndef CLOCKS_PER_SEC
301     #ifdef CLK_TCK
302     #define CLOCKS_PER_SEC CLK_TCK
303     #else
304     #define CLOCKS_PER_SEC 100
305     #endif
306     #endif
307    
308 nigel 93 /* This is the default loop count for timing. */
309    
310 nigel 75 #define LOOPREPEAT 500000
311 nigel 3
312 nigel 85 /* Static variables */
313    
314 nigel 3 static FILE *outfile;
315     static int log_store = 0;
316 nigel 63 static int callout_count;
317     static int callout_extra;
318     static int callout_fail_count;
319     static int callout_fail_id;
320 ph10 210 static int debug_lengths;
321 nigel 63 static int first_callout;
322 nigel 93 static int locale_set = 0;
323 nigel 73 static int show_malloc;
324 nigel 67 static int use_utf8;
325 nigel 43 static size_t gotten_store;
326 ph10 801 static size_t first_gotten_store = 0;
327 ph10 645 static const unsigned char *last_callout_mark = NULL;
328 nigel 3
329 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
330    
331     static int buffer_size = 50000;
332 ph10 756 static pcre_uint8 *buffer = NULL;
333     static pcre_uint8 *dbuffer = NULL;
334     static pcre_uint8 *pbuffer = NULL;
335 nigel 3
336 ph10 805 #ifdef SUPPORT_PCRE16
337     static int buffer16_size = 0;
338     static pcre_uint16 *buffer16 = NULL;
339     #endif
340    
341 ph10 808 /* If we have 8-bit support, default use_pcre16 to false; if there is also
342     16-bit support, it can be changed by an option. If there is no 8-bit support,
343     there must be 16-bit support, so default it to 1. */
344    
345     #ifdef SUPPORT_PCRE8
346     static int use_pcre16 = 0;
347     #else
348     static int use_pcre16 = 1;
349     #endif
350    
351 ph10 598 /* Textual explanations for runtime error codes */
352 nigel 75
353 ph10 598 static const char *errtexts[] = {
354     NULL, /* 0 is no error */
355     NULL, /* NOMATCH is handled specially */
356     "NULL argument passed",
357     "bad option value",
358     "magic number missing",
359     "unknown opcode - pattern overwritten?",
360     "no more memory",
361 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
362 ph10 598 "match limit exceeded",
363     "callout error code",
364     NULL, /* BADUTF8 is handled specially */
365     "bad UTF-8 offset",
366     NULL, /* PARTIAL is handled specially */
367     "not used - internal error",
368     "internal error - pattern overwritten?",
369     "bad count value",
370     "item unsupported for DFA matching",
371     "backreference condition or recursion test not supported for DFA matching",
372     "match limit not supported for DFA matching",
373     "workspace size exceeded in DFA matching",
374 ph10 654 "too much recursion for DFA matching",
375 ph10 598 "recursion limit exceeded",
376     "not used - internal error",
377     "invalid combination of newline options",
378     "bad offset value",
379 ph10 642 NULL, /* SHORTUTF8 is handled specially */
380 ph10 676 "nested recursion at the same subject position",
381 ph10 805 "JIT stack limit reached",
382 ph10 808 "pattern compiled in wrong mode (8-bit/16-bit error)"
383 ph10 598 };
384    
385 ph10 654
386 ph10 541 /*************************************************
387     * Alternate character tables *
388     *************************************************/
389 nigel 49
390 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
391     using the default tables of the library. However, the T option can be used to
392     select alternate sets of tables, for different kinds of testing. Note also that
393 ph10 541 the L (locale) option also adjusts the tables. */
394    
395 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
396 ph10 541 only ASCII characters. */
397    
398 ph10 808 static const pcre_uint8 tables0[] = {
399 ph10 541
400     /* This table is a lower casing table. */
401    
402     0, 1, 2, 3, 4, 5, 6, 7,
403     8, 9, 10, 11, 12, 13, 14, 15,
404     16, 17, 18, 19, 20, 21, 22, 23,
405     24, 25, 26, 27, 28, 29, 30, 31,
406     32, 33, 34, 35, 36, 37, 38, 39,
407     40, 41, 42, 43, 44, 45, 46, 47,
408     48, 49, 50, 51, 52, 53, 54, 55,
409     56, 57, 58, 59, 60, 61, 62, 63,
410     64, 97, 98, 99,100,101,102,103,
411     104,105,106,107,108,109,110,111,
412     112,113,114,115,116,117,118,119,
413     120,121,122, 91, 92, 93, 94, 95,
414     96, 97, 98, 99,100,101,102,103,
415     104,105,106,107,108,109,110,111,
416     112,113,114,115,116,117,118,119,
417     120,121,122,123,124,125,126,127,
418     128,129,130,131,132,133,134,135,
419     136,137,138,139,140,141,142,143,
420     144,145,146,147,148,149,150,151,
421     152,153,154,155,156,157,158,159,
422     160,161,162,163,164,165,166,167,
423     168,169,170,171,172,173,174,175,
424     176,177,178,179,180,181,182,183,
425     184,185,186,187,188,189,190,191,
426     192,193,194,195,196,197,198,199,
427     200,201,202,203,204,205,206,207,
428     208,209,210,211,212,213,214,215,
429     216,217,218,219,220,221,222,223,
430     224,225,226,227,228,229,230,231,
431     232,233,234,235,236,237,238,239,
432     240,241,242,243,244,245,246,247,
433     248,249,250,251,252,253,254,255,
434    
435     /* This table is a case flipping table. */
436    
437     0, 1, 2, 3, 4, 5, 6, 7,
438     8, 9, 10, 11, 12, 13, 14, 15,
439     16, 17, 18, 19, 20, 21, 22, 23,
440     24, 25, 26, 27, 28, 29, 30, 31,
441     32, 33, 34, 35, 36, 37, 38, 39,
442     40, 41, 42, 43, 44, 45, 46, 47,
443     48, 49, 50, 51, 52, 53, 54, 55,
444     56, 57, 58, 59, 60, 61, 62, 63,
445     64, 97, 98, 99,100,101,102,103,
446     104,105,106,107,108,109,110,111,
447     112,113,114,115,116,117,118,119,
448     120,121,122, 91, 92, 93, 94, 95,
449     96, 65, 66, 67, 68, 69, 70, 71,
450     72, 73, 74, 75, 76, 77, 78, 79,
451     80, 81, 82, 83, 84, 85, 86, 87,
452     88, 89, 90,123,124,125,126,127,
453     128,129,130,131,132,133,134,135,
454     136,137,138,139,140,141,142,143,
455     144,145,146,147,148,149,150,151,
456     152,153,154,155,156,157,158,159,
457     160,161,162,163,164,165,166,167,
458     168,169,170,171,172,173,174,175,
459     176,177,178,179,180,181,182,183,
460     184,185,186,187,188,189,190,191,
461     192,193,194,195,196,197,198,199,
462     200,201,202,203,204,205,206,207,
463     208,209,210,211,212,213,214,215,
464     216,217,218,219,220,221,222,223,
465     224,225,226,227,228,229,230,231,
466     232,233,234,235,236,237,238,239,
467     240,241,242,243,244,245,246,247,
468     248,249,250,251,252,253,254,255,
469    
470     /* This table contains bit maps for various character classes. Each map is 32
471     bytes long and the bits run from the least significant end of each byte. The
472     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
473     graph, print, punct, and cntrl. Other classes are built from combinations. */
474    
475     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
476     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
477     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
478     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
479    
480     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
481     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
482     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
483     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
484    
485     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
486     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
487     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
488     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
489    
490     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
491     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
492     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
494    
495     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
496     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
497     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
499    
500     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
501     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
502     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
504    
505     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
506     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
507     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
509    
510     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
511     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
512     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
513     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
514    
515     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
516     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
517     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519    
520     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
521     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
522     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
524    
525     /* This table identifies various classes of character by individual bits:
526     0x01 white space character
527     0x02 letter
528     0x04 decimal digit
529     0x08 hexadecimal digit
530     0x10 alphanumeric or '_'
531     0x80 regular expression metacharacter or binary zero
532     */
533    
534     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
535     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
536     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
537     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
538     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
539     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
540     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
541     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
542     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
543     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
544     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
545     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
546     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
547     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
548     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
549     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
550     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
551     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
552     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
553     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
554     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
555     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
556     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
557     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
558     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
559     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
560     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
561     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
562     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
563     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
564     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
565     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
566    
567 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
568     be at least an approximation of ISO 8859. In particular, there are characters
569 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
570    
571 ph10 808 static const pcre_uint8 tables1[] = {
572 ph10 541 0,1,2,3,4,5,6,7,
573     8,9,10,11,12,13,14,15,
574     16,17,18,19,20,21,22,23,
575     24,25,26,27,28,29,30,31,
576     32,33,34,35,36,37,38,39,
577     40,41,42,43,44,45,46,47,
578     48,49,50,51,52,53,54,55,
579     56,57,58,59,60,61,62,63,
580     64,97,98,99,100,101,102,103,
581     104,105,106,107,108,109,110,111,
582     112,113,114,115,116,117,118,119,
583     120,121,122,91,92,93,94,95,
584     96,97,98,99,100,101,102,103,
585     104,105,106,107,108,109,110,111,
586     112,113,114,115,116,117,118,119,
587     120,121,122,123,124,125,126,127,
588     128,129,130,131,132,133,134,135,
589     136,137,138,139,140,141,142,143,
590     144,145,146,147,148,149,150,151,
591     152,153,154,155,156,157,158,159,
592     160,161,162,163,164,165,166,167,
593     168,169,170,171,172,173,174,175,
594     176,177,178,179,180,181,182,183,
595     184,185,186,187,188,189,190,191,
596     224,225,226,227,228,229,230,231,
597     232,233,234,235,236,237,238,239,
598     240,241,242,243,244,245,246,215,
599     248,249,250,251,252,253,254,223,
600     224,225,226,227,228,229,230,231,
601     232,233,234,235,236,237,238,239,
602     240,241,242,243,244,245,246,247,
603     248,249,250,251,252,253,254,255,
604     0,1,2,3,4,5,6,7,
605     8,9,10,11,12,13,14,15,
606     16,17,18,19,20,21,22,23,
607     24,25,26,27,28,29,30,31,
608     32,33,34,35,36,37,38,39,
609     40,41,42,43,44,45,46,47,
610     48,49,50,51,52,53,54,55,
611     56,57,58,59,60,61,62,63,
612     64,97,98,99,100,101,102,103,
613     104,105,106,107,108,109,110,111,
614     112,113,114,115,116,117,118,119,
615     120,121,122,91,92,93,94,95,
616     96,65,66,67,68,69,70,71,
617     72,73,74,75,76,77,78,79,
618     80,81,82,83,84,85,86,87,
619     88,89,90,123,124,125,126,127,
620     128,129,130,131,132,133,134,135,
621     136,137,138,139,140,141,142,143,
622     144,145,146,147,148,149,150,151,
623     152,153,154,155,156,157,158,159,
624     160,161,162,163,164,165,166,167,
625     168,169,170,171,172,173,174,175,
626     176,177,178,179,180,181,182,183,
627     184,185,186,187,188,189,190,191,
628     224,225,226,227,228,229,230,231,
629     232,233,234,235,236,237,238,239,
630     240,241,242,243,244,245,246,215,
631     248,249,250,251,252,253,254,223,
632     192,193,194,195,196,197,198,199,
633     200,201,202,203,204,205,206,207,
634     208,209,210,211,212,213,214,247,
635     216,217,218,219,220,221,222,255,
636     0,62,0,0,1,0,0,0,
637     0,0,0,0,0,0,0,0,
638     32,0,0,0,1,0,0,0,
639     0,0,0,0,0,0,0,0,
640     0,0,0,0,0,0,255,3,
641     126,0,0,0,126,0,0,0,
642     0,0,0,0,0,0,0,0,
643     0,0,0,0,0,0,0,0,
644     0,0,0,0,0,0,255,3,
645     0,0,0,0,0,0,0,0,
646     0,0,0,0,0,0,12,2,
647     0,0,0,0,0,0,0,0,
648     0,0,0,0,0,0,0,0,
649     254,255,255,7,0,0,0,0,
650     0,0,0,0,0,0,0,0,
651     255,255,127,127,0,0,0,0,
652     0,0,0,0,0,0,0,0,
653     0,0,0,0,254,255,255,7,
654     0,0,0,0,0,4,32,4,
655     0,0,0,128,255,255,127,255,
656     0,0,0,0,0,0,255,3,
657     254,255,255,135,254,255,255,7,
658     0,0,0,0,0,4,44,6,
659     255,255,127,255,255,255,127,255,
660     0,0,0,0,254,255,255,255,
661     255,255,255,255,255,255,255,127,
662     0,0,0,0,254,255,255,255,
663     255,255,255,255,255,255,255,255,
664     0,2,0,0,255,255,255,255,
665     255,255,255,255,255,255,255,127,
666     0,0,0,0,255,255,255,255,
667     255,255,255,255,255,255,255,255,
668     0,0,0,0,254,255,0,252,
669     1,0,0,248,1,0,0,120,
670     0,0,0,0,254,255,255,255,
671     0,0,128,0,0,0,128,0,
672     255,255,255,255,0,0,0,0,
673     0,0,0,0,0,0,0,128,
674     255,255,255,255,0,0,0,0,
675     0,0,0,0,0,0,0,0,
676     128,0,0,0,0,0,0,0,
677     0,1,1,0,1,1,0,0,
678     0,0,0,0,0,0,0,0,
679     0,0,0,0,0,0,0,0,
680     1,0,0,0,128,0,0,0,
681     128,128,128,128,0,0,128,0,
682     28,28,28,28,28,28,28,28,
683     28,28,0,0,0,0,0,128,
684     0,26,26,26,26,26,26,18,
685     18,18,18,18,18,18,18,18,
686     18,18,18,18,18,18,18,18,
687     18,18,18,128,128,0,128,16,
688     0,26,26,26,26,26,26,18,
689     18,18,18,18,18,18,18,18,
690     18,18,18,18,18,18,18,18,
691     18,18,18,128,128,0,0,0,
692     0,0,0,0,0,1,0,0,
693     0,0,0,0,0,0,0,0,
694     0,0,0,0,0,0,0,0,
695     0,0,0,0,0,0,0,0,
696     1,0,0,0,0,0,0,0,
697     0,0,18,0,0,0,0,0,
698     0,0,20,20,0,18,0,0,
699     0,20,18,0,0,0,0,0,
700     18,18,18,18,18,18,18,18,
701     18,18,18,18,18,18,18,18,
702     18,18,18,18,18,18,18,0,
703     18,18,18,18,18,18,18,18,
704     18,18,18,18,18,18,18,18,
705     18,18,18,18,18,18,18,18,
706     18,18,18,18,18,18,18,0,
707     18,18,18,18,18,18,18,18
708     };
709    
710    
711    
712 ph10 558
713     #ifndef HAVE_STRERROR
714 nigel 49 /*************************************************
715 ph10 558 * Provide strerror() for non-ANSI libraries *
716     *************************************************/
717    
718     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
719     in their libraries, but can provide the same facility by this simple
720     alternative function. */
721    
722     extern int sys_nerr;
723     extern char *sys_errlist[];
724    
725     char *
726     strerror(int n)
727     {
728     if (n < 0 || n >= sys_nerr) return "unknown error number";
729     return sys_errlist[n];
730     }
731     #endif /* HAVE_STRERROR */
732    
733    
734 ph10 667 /*************************************************
735     * JIT memory callback *
736     *************************************************/
737 ph10 558
738 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
739     {
740     return (pcre_jit_stack *)arg;
741     }
742 ph10 558
743 ph10 667
744 ph10 808 /*************************************************
745     * Convert UTF-8 string to value *
746     *************************************************/
747    
748     /* This function takes one or more bytes that represents a UTF-8 character,
749     and returns the value of the character.
750    
751     Argument:
752     utf8bytes a pointer to the byte vector
753     vptr a pointer to an int to receive the value
754    
755     Returns: > 0 => the number of bytes consumed
756     -6 to 0 => malformed UTF-8 character at offset = (-return)
757     */
758    
759     #if !defined NOUTF8
760    
761     static int
762     utf82ord(pcre_uint8 *utf8bytes, int *vptr)
763     {
764     int c = *utf8bytes++;
765     int d = c;
766     int i, j, s;
767    
768     for (i = -1; i < 6; i++) /* i is number of additional bytes */
769     {
770     if ((d & 0x80) == 0) break;
771     d <<= 1;
772     }
773    
774     if (i == -1) { *vptr = c; return 1; } /* ascii character */
775     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
776    
777     /* i now has a value in the range 1-5 */
778    
779     s = 6*i;
780     d = (c & utf8_table3[i]) << s;
781    
782     for (j = 0; j < i; j++)
783     {
784     c = *utf8bytes++;
785     if ((c & 0xc0) != 0x80) return -(j+1);
786     s -= 6;
787     d |= (c & 0x3f) << s;
788     }
789    
790     /* Check that encoding was the correct unique one */
791    
792     for (j = 0; j < utf8_table1_size; j++)
793     if (d <= utf8_table1[j]) break;
794     if (j != i) return -(i+1);
795    
796     /* Valid value */
797    
798     *vptr = d;
799     return i+1;
800     }
801    
802     #endif
803    
804    
805    
806     /*************************************************
807     * Convert character value to UTF-8 *
808     *************************************************/
809    
810     /* This function takes an integer value in the range 0 - 0x7fffffff
811     and encodes it as a UTF-8 character in 0 to 6 bytes.
812    
813     Arguments:
814     cvalue the character value
815     utf8bytes pointer to buffer for result - at least 6 bytes long
816    
817     Returns: number of characters placed in the buffer
818     */
819    
820     #if !defined NOUTF8
821    
822     static int
823     ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
824     {
825     register int i, j;
826     for (i = 0; i < utf8_table1_size; i++)
827     if (cvalue <= utf8_table1[i]) break;
828     utf8bytes += i;
829     for (j = i; j > 0; j--)
830     {
831     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
832     cvalue >>= 6;
833     }
834     *utf8bytes = utf8_table2[i] | cvalue;
835     return i + 1;
836     }
837    
838     #endif
839    
840    
841    
842 ph10 805 #ifdef SUPPORT_PCRE16
843 ph10 558 /*************************************************
844 ph10 805 * Convert a string to 16-bit *
845     *************************************************/
846    
847 ph10 808 /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
848     8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
849     double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
850     in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
851     result is always left in buffer16. */
852 ph10 805
853     static int
854 ph10 808 to16(pcre_uint8 *p, int utf, int len)
855 ph10 805 {
856     pcre_uint16 *pp;
857    
858 ph10 808 if (buffer16_size < 2*len + 2)
859 ph10 805 {
860     if (buffer16 != NULL) free(buffer16);
861 ph10 808 buffer16_size = 2*len + 2;
862 ph10 805 buffer16 = (pcre_uint16 *)malloc(buffer16_size);
863 ph10 808 if (buffer16 == NULL)
864 ph10 805 {
865     fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
866     exit(1);
867     }
868     }
869 ph10 808
870 ph10 805 pp = buffer16;
871    
872     if (!utf)
873     {
874 ph10 808 while (len-- > 0) *pp++ = *p++;
875 ph10 805 }
876 ph10 808
877 ph10 805 else
878     {
879 ph10 808 int c;
880     while (len > 0)
881     {
882     int chlen = utf82ord(p, &c);
883     p += chlen;
884     len -= chlen;
885     if (c < 0x10000) *pp++ = c; else
886     {
887     c -= 0x10000;
888     *pp++ = 0xD800 | (c >> 10);
889     *pp++ = 0xDC00 | (c & 0x3ff);
890     }
891     }
892     }
893    
894     *pp = 0;
895 ph10 805 return pp - buffer16;
896 ph10 808 }
897 ph10 805 #endif
898    
899    
900     /*************************************************
901 nigel 91 * Read or extend an input line *
902     *************************************************/
903    
904     /* Input lines are read into buffer, but both patterns and data lines can be
905     continued over multiple input lines. In addition, if the buffer fills up, we
906     want to automatically expand it so as to be able to handle extremely large
907     lines that are needed for certain stress tests. When the input buffer is
908     expanded, the other two buffers must also be expanded likewise, and the
909     contents of pbuffer, which are a copy of the input for callouts, must be
910     preserved (for when expansion happens for a data line). This is not the most
911     optimal way of handling this, but hey, this is just a test program!
912    
913     Arguments:
914     f the file to read
915     start where in buffer to start (this *must* be within buffer)
916 ph10 287 prompt for stdin or readline()
917 nigel 91
918     Returns: pointer to the start of new data
919     could be a copy of start, or could be moved
920     NULL if no data read and EOF reached
921     */
922    
923 ph10 756 static pcre_uint8 *
924     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
925 nigel 91 {
926 ph10 756 pcre_uint8 *here = start;
927 nigel 91
928     for (;;)
929     {
930 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
931 nigel 93
932 nigel 91 if (rlen > 1000)
933     {
934     int dlen;
935 ph10 289
936 ph10 287 /* If libreadline support is required, use readline() to read a line if the
937     input is a terminal. Note that readline() removes the trailing newline, so
938     we must put it back again, to be compatible with fgets(). */
939 ph10 289
940 ph10 287 #ifdef SUPPORT_LIBREADLINE
941     if (isatty(fileno(f)))
942     {
943 ph10 289 size_t len;
944 ph10 287 char *s = readline(prompt);
945     if (s == NULL) return (here == start)? NULL : start;
946     len = strlen(s);
947 ph10 289 if (len > 0) add_history(s);
948 ph10 287 if (len > rlen - 1) len = rlen - 1;
949     memcpy(here, s, len);
950     here[len] = '\n';
951 ph10 289 here[len+1] = 0;
952     free(s);
953 ph10 287 }
954 ph10 289 else
955     #endif
956    
957 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
958 ph10 289
959 ph10 287 {
960 ph10 516 if (f == stdin) printf("%s", prompt);
961 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
962     return (here == start)? NULL : start;
963 ph10 289 }
964    
965 nigel 91 dlen = (int)strlen((char *)here);
966     if (dlen > 0 && here[dlen - 1] == '\n') return start;
967     here += dlen;
968     }
969    
970     else
971     {
972     int new_buffer_size = 2*buffer_size;
973 ph10 808 pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
974     pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
975     pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
976 nigel 91
977     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
978     {
979     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
980     exit(1);
981     }
982    
983     memcpy(new_buffer, buffer, buffer_size);
984     memcpy(new_pbuffer, pbuffer, buffer_size);
985    
986     buffer_size = new_buffer_size;
987    
988     start = new_buffer + (start - buffer);
989     here = new_buffer + (here - buffer);
990    
991     free(buffer);
992     free(dbuffer);
993     free(pbuffer);
994    
995     buffer = new_buffer;
996     dbuffer = new_dbuffer;
997     pbuffer = new_pbuffer;
998     }
999     }
1000    
1001     return NULL; /* Control never gets here */
1002     }
1003    
1004    
1005    
1006     /*************************************************
1007 nigel 63 * Read number from string *
1008     *************************************************/
1009    
1010     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1011     around with conditional compilation, just do the job by hand. It is only used
1012 nigel 93 for unpicking arguments, so just keep it simple.
1013 nigel 63
1014     Arguments:
1015     str string to be converted
1016     endptr where to put the end pointer
1017    
1018     Returns: the unsigned long
1019     */
1020    
1021     static int
1022 ph10 808 get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1023 nigel 63 {
1024     int result = 0;
1025     while(*str != 0 && isspace(*str)) str++;
1026     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1027     *endptr = str;
1028     return(result);
1029     }
1030    
1031    
1032    
1033 ph10 808 #ifdef SUPPORT_PCRE8
1034 nigel 49 /*************************************************
1035 ph10 808 * Print 8-bit character string *
1036 nigel 49 *************************************************/
1037    
1038 ph10 808 /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1039     If handed a NULL file, just counts chars without printing. */
1040 nigel 49
1041 ph10 808 static int pchars(pcre_uint8 *p, int length, FILE *f)
1042 nigel 49 {
1043 nigel 85 int c = 0;
1044 nigel 63 int yield = 0;
1045 nigel 3
1046 nigel 63 while (length-- > 0)
1047 nigel 3 {
1048 nigel 79 #if !defined NOUTF8
1049 nigel 67 if (use_utf8)
1050 nigel 63 {
1051     int rc = utf82ord(p, &c);
1052 nigel 3
1053 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
1054     {
1055     length -= rc - 1;
1056     p += rc;
1057 ph10 808 if (PRINTOK(c))
1058 nigel 63 {
1059     if (f != NULL) fprintf(f, "%c", c);
1060     yield++;
1061     }
1062     else
1063     {
1064 nigel 93 int n = 4;
1065     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1066     yield += (n <= 0x000000ff)? 2 :
1067     (n <= 0x00000fff)? 3 :
1068     (n <= 0x0000ffff)? 4 :
1069     (n <= 0x000fffff)? 5 : 6;
1070 nigel 63 }
1071     continue;
1072     }
1073     }
1074 nigel 79 #endif
1075 nigel 3
1076 nigel 63 /* Not UTF-8, or malformed UTF-8 */
1077    
1078 nigel 93 c = *p++;
1079 ph10 808 if (PRINTOK(c))
1080 nigel 3 {
1081 nigel 63 if (f != NULL) fprintf(f, "%c", c);
1082     yield++;
1083 nigel 3 }
1084 nigel 63 else
1085 nigel 3 {
1086 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
1087     yield += 4;
1088     }
1089     }
1090 nigel 3
1091 nigel 63 return yield;
1092     }
1093 ph10 808 #endif
1094 nigel 23
1095 nigel 3
1096 nigel 23
1097 ph10 808 #ifdef SUPPORT_PCRE16
1098 nigel 63 /*************************************************
1099 ph10 808 * Print 16-bit character string *
1100     *************************************************/
1101    
1102     /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1103     If handed a NULL file, just counts chars without printing. */
1104    
1105     static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1106     {
1107     int yield = 0;
1108    
1109     while (length-- > 0)
1110     {
1111     int c = *p++ & 0xffff;
1112    
1113     #if !defined NOUTF8
1114     if (use_utf8 && c >= 0xD800 && c < 0xDC00 && length > 0)
1115     {
1116     int d = *p & 0xffff;
1117     if (d >= 0xDC00 && d < 0xDFFF)
1118     {
1119     c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1120     length--;
1121     p++;
1122     }
1123     }
1124     #endif
1125    
1126     if (PRINTOK(c))
1127     {
1128     if (f != NULL) fprintf(f, "%c", c);
1129     yield++;
1130     }
1131     else
1132     {
1133     yield += 4;
1134     if (c < 0x100)
1135     {
1136     if (f != NULL) fprintf(f, "\\x%02x", c);
1137     }
1138     else
1139     {
1140     if (f != NULL) fprintf(f, "\\x{%02x}", c);
1141     yield += (c <= 0x000000ff)? 2 :
1142     (c <= 0x00000fff)? 3 :
1143     (c <= 0x0000ffff)? 4 :
1144     (c <= 0x000fffff)? 5 : 6;
1145     }
1146     }
1147     }
1148    
1149     return yield;
1150     }
1151     #endif
1152    
1153    
1154    
1155     /*************************************************
1156 nigel 63 * Callout function *
1157     *************************************************/
1158 nigel 3
1159 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
1160     the match. Yield zero unless more callouts than the fail count, or the callout
1161     data is not zero. */
1162 nigel 3
1163 nigel 63 static int callout(pcre_callout_block *cb)
1164     {
1165     FILE *f = (first_callout | callout_extra)? outfile : NULL;
1166 nigel 75 int i, pre_start, post_start, subject_length;
1167 nigel 3
1168 nigel 63 if (callout_extra)
1169     {
1170     fprintf(f, "Callout %d: last capture = %d\n",
1171     cb->callout_number, cb->capture_last);
1172 nigel 3
1173 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
1174     {
1175     if (cb->offset_vector[i] < 0)
1176     fprintf(f, "%2d: <unset>\n", i/2);
1177     else
1178     {
1179     fprintf(f, "%2d: ", i/2);
1180 ph10 808 PCHARSV(cb->subject + cb->offset_vector[i],
1181 nigel 63 cb->offset_vector[i+1] - cb->offset_vector[i], f);
1182     fprintf(f, "\n");
1183     }
1184     }
1185     }
1186 nigel 3
1187 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
1188     datails. On subsequent calls in the same match, we use pchars just to find the
1189     printed lengths of the substrings. */
1190 nigel 3
1191 nigel 63 if (f != NULL) fprintf(f, "--->");
1192 nigel 3
1193 ph10 808 PCHARS(pre_start, cb->subject, cb->start_match, f);
1194     PCHARS(post_start, cb->subject + cb->start_match,
1195 nigel 63 cb->current_position - cb->start_match, f);
1196 nigel 3
1197 ph10 808 PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1198 nigel 75
1199 ph10 808 PCHARSV(cb->subject + cb->current_position,
1200 nigel 63 cb->subject_length - cb->current_position, f);
1201 nigel 3
1202 nigel 63 if (f != NULL) fprintf(f, "\n");
1203 nigel 9
1204 nigel 63 /* Always print appropriate indicators, with callout number if not already
1205 nigel 75 shown. For automatic callouts, show the pattern offset. */
1206 nigel 3
1207 nigel 75 if (cb->callout_number == 255)
1208     {
1209     fprintf(outfile, "%+3d ", cb->pattern_position);
1210     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
1211     }
1212     else
1213     {
1214     if (callout_extra) fprintf(outfile, " ");
1215     else fprintf(outfile, "%3d ", cb->callout_number);
1216     }
1217 nigel 3
1218 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
1219     fprintf(outfile, "^");
1220 nigel 3
1221 nigel 63 if (post_start > 0)
1222     {
1223     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
1224     fprintf(outfile, "^");
1225 nigel 3 }
1226    
1227 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
1228     fprintf(outfile, " ");
1229    
1230     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
1231     pbuffer + cb->pattern_position);
1232    
1233 nigel 63 fprintf(outfile, "\n");
1234     first_callout = 0;
1235 nigel 3
1236 ph10 654 if (cb->mark != last_callout_mark)
1237 ph10 645 {
1238 ph10 654 fprintf(outfile, "Latest Mark: %s\n",
1239 ph10 645 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1240 ph10 654 last_callout_mark = cb->mark;
1241     }
1242 ph10 645
1243 nigel 71 if (cb->callout_data != NULL)
1244 nigel 49 {
1245 nigel 71 int callout_data = *((int *)(cb->callout_data));
1246     if (callout_data != 0)
1247     {
1248     fprintf(outfile, "Callout data = %d\n", callout_data);
1249     return callout_data;
1250     }
1251 nigel 63 }
1252 nigel 49
1253 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
1254     (++callout_count >= callout_fail_count)? 1 : 0;
1255 nigel 3 }
1256    
1257    
1258 nigel 63 /*************************************************
1259 nigel 73 * Local malloc functions *
1260 nigel 63 *************************************************/
1261 nigel 3
1262 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1263 ph10 801 compiled re, which is the first store request that pcre_compile() makes. The
1264     show_malloc variable is set only during matching. */
1265 nigel 3
1266     static void *new_malloc(size_t size)
1267     {
1268 nigel 73 void *block = malloc(size);
1269 nigel 43 gotten_store = size;
1270 ph10 801 if (first_gotten_store == 0) first_gotten_store = size;
1271 nigel 73 if (show_malloc)
1272 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1273 nigel 73 return block;
1274 nigel 3 }
1275    
1276 nigel 73 static void new_free(void *block)
1277     {
1278     if (show_malloc)
1279     fprintf(outfile, "free %p\n", block);
1280     free(block);
1281     }
1282 nigel 3
1283 nigel 73 /* For recursion malloc/free, to test stacking calls */
1284    
1285     static void *stack_malloc(size_t size)
1286     {
1287     void *block = malloc(size);
1288     if (show_malloc)
1289 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1290 nigel 73 return block;
1291     }
1292    
1293     static void stack_free(void *block)
1294     {
1295     if (show_malloc)
1296     fprintf(outfile, "stack_free %p\n", block);
1297     free(block);
1298     }
1299    
1300    
1301 nigel 63 /*************************************************
1302     * Call pcre_fullinfo() *
1303     *************************************************/
1304 nigel 43
1305 ph10 808 /* Get one piece of information from the pcre_fullinfo() function. When only
1306     one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1307     value, but the code is defensive. */
1308 nigel 43
1309     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1310     {
1311     int rc;
1312 ph10 808
1313     if (use_pcre16)
1314     #ifdef SUPPORT_PCRE16
1315     rc = pcre16_fullinfo(re, study, option, ptr);
1316     #else
1317     rc = PCRE_ERROR_BADMODE;
1318     #endif
1319     else
1320     #ifdef SUPPORT_PCRE8
1321     rc = pcre_fullinfo(re, study, option, ptr);
1322     #else
1323     rc = PCRE_ERROR_BADMODE;
1324     #endif
1325    
1326     if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1327     use_pcre16? "16" : "", option);
1328 nigel 43 }
1329    
1330    
1331    
1332 nigel 63 /*************************************************
1333 nigel 75 * Byte flipping function *
1334     *************************************************/
1335    
1336 nigel 91 static unsigned long int
1337     byteflip(unsigned long int value, int n)
1338 nigel 75 {
1339     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1340     return ((value & 0x000000ff) << 24) |
1341     ((value & 0x0000ff00) << 8) |
1342     ((value & 0x00ff0000) >> 8) |
1343     ((value & 0xff000000) >> 24);
1344     }
1345    
1346    
1347    
1348    
1349     /*************************************************
1350 nigel 87 * Check match or recursion limit *
1351     *************************************************/
1352    
1353     static int
1354 ph10 756 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1355 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
1356     int flag, unsigned long int *limit, int errnumber, const char *msg)
1357     {
1358     int count;
1359     int min = 0;
1360     int mid = 64;
1361     int max = -1;
1362    
1363     extra->flags |= flag;
1364    
1365     for (;;)
1366     {
1367     *limit = mid;
1368    
1369 ph10 808 PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1370 nigel 87 use_offsets, use_size_offsets);
1371    
1372     if (count == errnumber)
1373     {
1374     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1375     min = mid;
1376     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1377     }
1378    
1379     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1380     count == PCRE_ERROR_PARTIAL)
1381     {
1382     if (mid == min + 1)
1383     {
1384     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1385     break;
1386     }
1387     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1388     max = mid;
1389     mid = (min + mid)/2;
1390     }
1391     else break; /* Some other error */
1392     }
1393    
1394     extra->flags &= ~flag;
1395     return count;
1396     }
1397    
1398    
1399    
1400     /*************************************************
1401 ph10 227 * Case-independent strncmp() function *
1402     *************************************************/
1403    
1404     /*
1405     Arguments:
1406     s first string
1407     t second string
1408     n number of characters to compare
1409    
1410     Returns: < 0, = 0, or > 0, according to the comparison
1411     */
1412    
1413     static int
1414 ph10 756 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1415 ph10 227 {
1416     while (n--)
1417     {
1418     int c = tolower(*s++) - tolower(*t++);
1419     if (c) return c;
1420     }
1421     return 0;
1422     }
1423    
1424    
1425    
1426     /*************************************************
1427 nigel 91 * Check newline indicator *
1428     *************************************************/
1429    
1430 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1431     a message and return 0 if there is no match.
1432 nigel 91
1433     Arguments:
1434     p points after the leading '<'
1435     f file for error message
1436    
1437     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1438     */
1439    
1440     static int
1441 ph10 756 check_newline(pcre_uint8 *p, FILE *f)
1442 nigel 91 {
1443 ph10 756 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1444     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1445     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1446     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1447     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1448     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1449     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1450 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1451     return 0;
1452     }
1453    
1454    
1455    
1456     /*************************************************
1457 nigel 93 * Usage function *
1458     *************************************************/
1459    
1460     static void
1461     usage(void)
1462     {
1463 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1464     printf("Input and output default to stdin and stdout.\n");
1465     #ifdef SUPPORT_LIBREADLINE
1466     printf("If input is a terminal, readline() is used to read from it.\n");
1467     #else
1468     printf("This version of pcretest is not linked with readline().\n");
1469     #endif
1470     printf("\nOptions:\n");
1471 ph10 805 #ifdef SUPPORT_PCRE16
1472     printf(" -16 use 16-bit interface\n");
1473     #endif
1474 nigel 93 printf(" -b show compiled code (bytecode)\n");
1475     printf(" -C show PCRE compile-time options and exit\n");
1476     printf(" -d debug: show compiled code and information (-b and -i)\n");
1477     #if !defined NODFA
1478     printf(" -dfa force DFA matching for all subjects\n");
1479     #endif
1480     printf(" -help show usage information\n");
1481     printf(" -i show information about compiled patterns\n"
1482 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1483 nigel 93 " -m output memory used information\n"
1484     " -o <n> set size of offsets vector to <n>\n");
1485     #if !defined NOPOSIX
1486     printf(" -p use POSIX interface\n");
1487     #endif
1488     printf(" -q quiet: do not output PCRE version number at start\n");
1489     printf(" -S <n> set stack size to <n> megabytes\n");
1490 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
1491     " -s+ force each pattern to be studied, using JIT if available\n"
1492 nigel 93 " -t time compilation and execution\n");
1493     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1494     printf(" -tm time execution (matching) only\n");
1495     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1496     }
1497    
1498    
1499    
1500     /*************************************************
1501 nigel 63 * Main Program *
1502     *************************************************/
1503 nigel 43
1504 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1505     consist of a regular expression, in delimiters and optionally followed by
1506     options, followed by a set of test data, terminated by an empty line. */
1507    
1508     int main(int argc, char **argv)
1509     {
1510     FILE *infile = stdin;
1511     int options = 0;
1512     int study_options = 0;
1513 ph10 386 int default_find_match_limit = FALSE;
1514 nigel 3 int op = 1;
1515     int timeit = 0;
1516 nigel 93 int timeitm = 0;
1517 nigel 3 int showinfo = 0;
1518 nigel 31 int showstore = 0;
1519 ph10 667 int force_study = -1;
1520     int force_study_options = 0;
1521 nigel 87 int quiet = 0;
1522 nigel 53 int size_offsets = 45;
1523     int size_offsets_max;
1524 nigel 77 int *offsets = NULL;
1525 nigel 53 #if !defined NOPOSIX
1526 nigel 3 int posix = 0;
1527 nigel 53 #endif
1528 nigel 3 int debug = 0;
1529 nigel 11 int done = 0;
1530 nigel 77 int all_use_dfa = 0;
1531     int yield = 0;
1532 nigel 91 int stack_size;
1533 nigel 3
1534 ph10 667 pcre_jit_stack *jit_stack = NULL;
1535    
1536 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1537     that 1024 is plenty long enough for the few names we'll be testing. */
1538 nigel 69
1539 ph10 756 pcre_uchar copynames[1024];
1540     pcre_uchar getnames[1024];
1541 nigel 91
1542 ph10 756 pcre_uchar *copynamesptr;
1543     pcre_uchar *getnamesptr;
1544 nigel 91
1545 ph10 805 /* Get buffers from malloc() so that valgrind will check their misuse when
1546 ph10 808 debugging. They grow automatically when very long lines are read. The 16-bit
1547 ph10 805 buffer (buffer16) is obtained only if needed. */
1548 nigel 69
1549 ph10 756 buffer = (pcre_uint8 *)malloc(buffer_size);
1550     dbuffer = (pcre_uint8 *)malloc(buffer_size);
1551     pbuffer = (pcre_uint8 *)malloc(buffer_size);
1552 nigel 69
1553 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1554 nigel 3
1555 nigel 93 outfile = stdout;
1556    
1557     /* The following _setmode() stuff is some Windows magic that tells its runtime
1558     library to translate CRLF into a single LF character. At least, that's what
1559     I've been told: never having used Windows I take this all on trust. Originally
1560     it set 0x8000, but then I was advised that _O_BINARY was better. */
1561    
1562 nigel 75 #if defined(_WIN32) || defined(WIN32)
1563 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1564     #endif
1565 nigel 75
1566 nigel 3 /* Scan options */
1567    
1568     while (argc > 1 && argv[op][0] == '-')
1569     {
1570 ph10 808 pcre_uint8 *endptr;
1571 nigel 53
1572 ph10 808 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1573 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1574 ph10 691 else if (strcmp(argv[op], "-s+") == 0)
1575 ph10 667 {
1576     force_study = 1;
1577     force_study_options = PCRE_STUDY_JIT_COMPILE;
1578 ph10 691 }
1579 ph10 808 #ifdef SUPPORT_PCRE16
1580     else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1581     #endif
1582    
1583 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1584 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1585 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1586     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1587 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1588 nigel 79 #if !defined NODFA
1589 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1590 nigel 79 #endif
1591 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1592 ph10 808 ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1593 nigel 65 *endptr == 0))
1594 nigel 53 {
1595     op++;
1596     argc--;
1597     }
1598 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1599     {
1600     int both = argv[op][2] == 0;
1601     int temp;
1602 ph10 808 if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1603 nigel 93 *endptr == 0))
1604     {
1605     timeitm = temp;
1606     op++;
1607     argc--;
1608     }
1609     else timeitm = LOOPREPEAT;
1610     if (both) timeit = timeitm;
1611     }
1612 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1613 ph10 808 ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1614 nigel 91 *endptr == 0))
1615     {
1616 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1617 nigel 91 printf("PCRE: -S not supported on this OS\n");
1618     exit(1);
1619     #else
1620     int rc;
1621     struct rlimit rlim;
1622     getrlimit(RLIMIT_STACK, &rlim);
1623     rlim.rlim_cur = stack_size * 1024 * 1024;
1624     rc = setrlimit(RLIMIT_STACK, &rlim);
1625     if (rc != 0)
1626     {
1627     printf("PCRE: setrlimit() failed with error %d\n", rc);
1628     exit(1);
1629     }
1630     op++;
1631     argc--;
1632     #endif
1633     }
1634 nigel 53 #if !defined NOPOSIX
1635 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1636 nigel 53 #endif
1637 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1638     {
1639     int rc;
1640 ph10 392 unsigned long int lrc;
1641 nigel 63 printf("PCRE version %s\n", pcre_version());
1642     printf("Compiled with\n");
1643 ph10 805
1644 ph10 808 /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1645     are set, either both UTFs are supported or both are not supported. */
1646    
1647 ph10 805 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1648     printf(" 8-bit and 16-bit support\n");
1649 nigel 63 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1650 ph10 808 if (rc)
1651     printf(" UTF-8 and UTF-16 support\n");
1652     else
1653     printf(" No UTF-8 or UTF-16 support\n");
1654 ph10 805 #elif defined SUPPORT_PCRE8
1655     printf(" 8-bit support only\n");
1656     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1657     printf(" %sUTF-8 support\n", rc? "" : "No ");
1658 ph10 808 #else
1659 ph10 805 printf(" 16-bit support only\n");
1660     (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1661     printf(" %sUTF-16 support\n", rc? "" : "No ");
1662 ph10 808 #endif
1663    
1664 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1665     printf(" %sUnicode properties support\n", rc? "" : "No ");
1666 ph10 667 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1667 ph10 674 if (rc)
1668 ph10 689 printf(" Just-in-time compiler support\n");
1669 ph10 674 else
1670     printf(" No just-in-time compiler support\n");
1671 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1672 ph10 391 /* Note that these values are always the ASCII values, even
1673 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1674 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1675     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1676 ph10 150 (rc == -2)? "ANYCRLF" :
1677 nigel 93 (rc == -1)? "ANY" : "???");
1678 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1679     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1680     "all Unicode newlines");
1681 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1682     printf(" Internal link size = %d\n", rc);
1683     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1684     printf(" POSIX malloc threshold = %d\n", rc);
1685 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1686     printf(" Default match limit = %ld\n", lrc);
1687     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1688     printf(" Default recursion depth limit = %ld\n", lrc);
1689 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1690     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1691 ph10 121 goto EXIT;
1692 nigel 63 }
1693 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1694     strcmp(argv[op], "--help") == 0)
1695     {
1696     usage();
1697     goto EXIT;
1698     }
1699 nigel 3 else
1700     {
1701 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1702 nigel 93 usage();
1703 nigel 77 yield = 1;
1704     goto EXIT;
1705 nigel 3 }
1706     op++;
1707     argc--;
1708     }
1709    
1710 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1711    
1712     size_offsets_max = size_offsets;
1713 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1714 nigel 53 if (offsets == NULL)
1715     {
1716     printf("** Failed to get %d bytes of memory for offsets vector\n",
1717 ph10 151 (int)(size_offsets_max * sizeof(int)));
1718 nigel 77 yield = 1;
1719     goto EXIT;
1720 nigel 53 }
1721    
1722 nigel 3 /* Sort out the input and output files */
1723    
1724     if (argc > 1)
1725     {
1726 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1727 nigel 3 if (infile == NULL)
1728     {
1729     printf("** Failed to open %s\n", argv[op]);
1730 nigel 77 yield = 1;
1731     goto EXIT;
1732 nigel 3 }
1733     }
1734    
1735     if (argc > 2)
1736     {
1737 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1738 nigel 3 if (outfile == NULL)
1739     {
1740     printf("** Failed to open %s\n", argv[op+1]);
1741 nigel 77 yield = 1;
1742     goto EXIT;
1743 nigel 3 }
1744     }
1745    
1746     /* Set alternative malloc function */
1747    
1748 ph10 805 #ifdef SUPPORT_PCRE8
1749 nigel 3 pcre_malloc = new_malloc;
1750 nigel 73 pcre_free = new_free;
1751     pcre_stack_malloc = stack_malloc;
1752     pcre_stack_free = stack_free;
1753 ph10 805 #endif
1754 nigel 3
1755 ph10 805 #ifdef SUPPORT_PCRE16
1756     pcre16_malloc = new_malloc;
1757     pcre16_free = new_free;
1758     pcre16_stack_malloc = stack_malloc;
1759     pcre16_stack_free = stack_free;
1760     #endif
1761    
1762 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1763 nigel 3
1764 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1765 nigel 3
1766     /* Main loop */
1767    
1768 nigel 11 while (!done)
1769 nigel 3 {
1770     pcre *re = NULL;
1771     pcre_extra *extra = NULL;
1772 nigel 37
1773     #if !defined NOPOSIX /* There are still compilers that require no indent */
1774 nigel 3 regex_t preg;
1775 nigel 45 int do_posix = 0;
1776 nigel 37 #endif
1777    
1778 nigel 7 const char *error;
1779 ph10 808 pcre_uint8 *markptr;
1780     pcre_uint8 *p, *pp, *ppp;
1781     pcre_uint8 *to_file = NULL;
1782     const pcre_uint8 *tables = NULL;
1783 nigel 75 unsigned long int true_size, true_study_size = 0;
1784     size_t size, regex_gotten_store;
1785 ph10 654 int do_allcaps = 0;
1786 ph10 512 int do_mark = 0;
1787 nigel 3 int do_study = 0;
1788 ph10 654 int no_force_study = 0;
1789 nigel 25 int do_debug = debug;
1790 nigel 35 int do_G = 0;
1791     int do_g = 0;
1792 nigel 25 int do_showinfo = showinfo;
1793 nigel 35 int do_showrest = 0;
1794 ph10 616 int do_showcaprest = 0;
1795 nigel 75 int do_flip = 0;
1796 nigel 93 int erroroffset, len, delimiter, poffset;
1797 nigel 3
1798 nigel 67 use_utf8 = 0;
1799 ph10 211 debug_lengths = 1;
1800 nigel 63
1801 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1802 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1803 nigel 63 fflush(outfile);
1804 nigel 3
1805     p = buffer;
1806     while (isspace(*p)) p++;
1807     if (*p == 0) continue;
1808    
1809 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1810 nigel 3
1811 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1812     {
1813 nigel 91 unsigned long int magic, get_options;
1814 ph10 756 pcre_uint8 sbuf[8];
1815 nigel 75 FILE *f;
1816    
1817     p++;
1818     pp = p + (int)strlen((char *)p);
1819     while (isspace(pp[-1])) pp--;
1820     *pp = 0;
1821    
1822     f = fopen((char *)p, "rb");
1823     if (f == NULL)
1824     {
1825     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1826     continue;
1827     }
1828    
1829     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1830    
1831     true_size =
1832     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1833     true_study_size =
1834     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1835    
1836     re = (real_pcre *)new_malloc(true_size);
1837 ph10 801 regex_gotten_store = first_gotten_store;
1838 nigel 75
1839     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1840    
1841     magic = ((real_pcre *)re)->magic_number;
1842     if (magic != MAGIC_NUMBER)
1843     {
1844     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1845     {
1846     do_flip = 1;
1847     }
1848     else
1849     {
1850     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1851     fclose(f);
1852     continue;
1853     }
1854     }
1855    
1856 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1857 nigel 75 do_flip? " (byte-inverted)" : "", p);
1858    
1859     /* Need to know if UTF-8 for printing data strings */
1860    
1861 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1862     use_utf8 = (get_options & PCRE_UTF8) != 0;
1863 nigel 75
1864 ph10 612 /* Now see if there is any following study data. */
1865 nigel 75
1866     if (true_study_size != 0)
1867     {
1868     pcre_study_data *psd;
1869    
1870     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1871     extra->flags = PCRE_EXTRA_STUDY_DATA;
1872    
1873     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1874     extra->study_data = psd;
1875    
1876     if (fread(psd, 1, true_study_size, f) != true_study_size)
1877     {
1878     FAIL_READ:
1879     fprintf(outfile, "Failed to read data from %s\n", p);
1880 zherczeg 809 if (extra != NULL)
1881     {
1882     PCRE_FREE_STUDY(extra);
1883     }
1884 nigel 75 if (re != NULL) new_free(re);
1885     fclose(f);
1886     continue;
1887     }
1888     fprintf(outfile, "Study data loaded from %s\n", p);
1889     do_study = 1; /* To get the data output if requested */
1890     }
1891     else fprintf(outfile, "No study data\n");
1892    
1893     fclose(f);
1894     goto SHOW_INFO;
1895     }
1896    
1897     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1898     the pattern; if is isn't complete, read more. */
1899    
1900 nigel 3 delimiter = *p++;
1901    
1902 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1903 nigel 3 {
1904 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1905 nigel 3 goto SKIP_DATA;
1906     }
1907    
1908     pp = p;
1909 ph10 530 poffset = (int)(p - buffer);
1910 nigel 3
1911     for(;;)
1912     {
1913 nigel 29 while (*pp != 0)
1914     {
1915     if (*pp == '\\' && pp[1] != 0) pp++;
1916     else if (*pp == delimiter) break;
1917     pp++;
1918     }
1919 nigel 3 if (*pp != 0) break;
1920 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1921 nigel 3 {
1922     fprintf(outfile, "** Unexpected EOF\n");
1923 nigel 11 done = 1;
1924     goto CONTINUE;
1925 nigel 3 }
1926 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1927 nigel 3 }
1928    
1929 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1930     pointer to the correct relative point in the buffer. */
1931    
1932     p = buffer + poffset;
1933    
1934 nigel 29 /* If the first character after the delimiter is backslash, make
1935     the pattern end with backslash. This is purely to provide a way
1936     of testing for the error message when a pattern ends with backslash. */
1937    
1938     if (pp[1] == '\\') *pp++ = '\\';
1939    
1940 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1941     for callouts. */
1942 nigel 3
1943     *pp++ = 0;
1944 nigel 75 strcpy((char *)pbuffer, (char *)p);
1945 nigel 3
1946     /* Look for options after final delimiter */
1947    
1948     options = 0;
1949 ph10 801 study_options = 0;
1950 nigel 31 log_store = showstore; /* default from command line */
1951    
1952 nigel 3 while (*pp != 0)
1953     {
1954     switch (*pp++)
1955     {
1956 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1957 nigel 35 case 'g': do_g = 1; break;
1958 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1959     case 'm': options |= PCRE_MULTILINE; break;
1960     case 's': options |= PCRE_DOTALL; break;
1961     case 'x': options |= PCRE_EXTENDED; break;
1962 nigel 25
1963 ph10 616 case '+':
1964 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1965 ph10 616 break;
1966 ph10 654
1967     case '=': do_allcaps = 1; break;
1968 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1969 nigel 93 case 'B': do_debug = 1; break;
1970 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1971 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1972 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1973 nigel 75 case 'F': do_flip = 1; break;
1974 nigel 35 case 'G': do_G = 1; break;
1975 nigel 25 case 'I': do_showinfo = 1; break;
1976 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1977 ph10 512 case 'K': do_mark = 1; break;
1978 nigel 31 case 'M': log_store = 1; break;
1979 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1980 nigel 37
1981     #if !defined NOPOSIX
1982 nigel 3 case 'P': do_posix = 1; break;
1983 nigel 37 #endif
1984    
1985 ph10 654 case 'S':
1986 ph10 691 if (do_study == 0)
1987 ph10 612 {
1988 ph10 691 do_study = 1;
1989 ph10 667 if (*pp == '+')
1990     {
1991     study_options |= PCRE_STUDY_JIT_COMPILE;
1992 ph10 691 pp++;
1993     }
1994     }
1995 ph10 667 else
1996     {
1997 ph10 612 do_study = 0;
1998     no_force_study = 1;
1999 ph10 654 }
2000 ph10 612 break;
2001    
2002 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
2003 ph10 535 case 'W': options |= PCRE_UCP; break;
2004 nigel 3 case 'X': options |= PCRE_EXTRA; break;
2005 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2006 ph10 126 case 'Z': debug_lengths = 0; break;
2007 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
2008 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
2009 ph10 545
2010 ph10 541 case 'T':
2011     switch (*pp++)
2012     {
2013     case '0': tables = tables0; break;
2014     case '1': tables = tables1; break;
2015 ph10 545
2016 ph10 541 case '\r':
2017     case '\n':
2018 ph10 545 case ' ':
2019     case 0:
2020 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
2021 ph10 545 goto SKIP_DATA;
2022    
2023     default:
2024 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2025 ph10 545 goto SKIP_DATA;
2026 ph10 541 }
2027 ph10 545 break;
2028 nigel 25
2029     case 'L':
2030     ppp = pp;
2031 nigel 93 /* The '\r' test here is so that it works on Windows. */
2032     /* The '0' test is just in case this is an unterminated line. */
2033     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
2034 nigel 25 *ppp = 0;
2035     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
2036     {
2037     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
2038     goto SKIP_DATA;
2039     }
2040 nigel 93 locale_set = 1;
2041 nigel 25 tables = pcre_maketables();
2042     pp = ppp;
2043     break;
2044    
2045 nigel 75 case '>':
2046     to_file = pp;
2047     while (*pp != 0) pp++;
2048     while (isspace(pp[-1])) pp--;
2049     *pp = 0;
2050     break;
2051    
2052 nigel 91 case '<':
2053     {
2054 ph10 756 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2055 ph10 336 {
2056     options |= PCRE_JAVASCRIPT_COMPAT;
2057 ph10 345 pp += 3;
2058 ph10 336 }
2059     else
2060 ph10 345 {
2061 ph10 336 int x = check_newline(pp, outfile);
2062     if (x == 0) goto SKIP_DATA;
2063     options |= x;
2064     while (*pp++ != '>');
2065 ph10 345 }
2066 nigel 91 }
2067     break;
2068    
2069 nigel 77 case '\r': /* So that it works in Windows */
2070     case '\n':
2071     case ' ':
2072     break;
2073 nigel 75
2074 nigel 3 default:
2075     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
2076     goto SKIP_DATA;
2077     }
2078     }
2079    
2080 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
2081 nigel 25 timing, showing, or debugging options, nor the ability to pass over
2082 ph10 805 local character tables. Neither does it have 16-bit support. */
2083 nigel 3
2084 nigel 37 #if !defined NOPOSIX
2085 nigel 3 if (posix || do_posix)
2086     {
2087     int rc;
2088     int cflags = 0;
2089 nigel 75
2090 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
2091     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
2092 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2093 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2094     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2095 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2096 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2097 nigel 87
2098 ph10 801 first_gotten_store = 0;
2099 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
2100    
2101     /* Compilation failed; go back for another re, skipping to blank line
2102     if non-interactive. */
2103    
2104     if (rc != 0)
2105     {
2106 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2107 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
2108     goto SKIP_DATA;
2109     }
2110     }
2111    
2112     /* Handle compiling via the native interface */
2113    
2114     else
2115 nigel 37 #endif /* !defined NOPOSIX */
2116    
2117 nigel 3 {
2118 ph10 412 unsigned long int get_options;
2119 ph10 808
2120     /* In 16-bit mode, convert the input. */
2121    
2122 ph10 805 #ifdef SUPPORT_PCRE16
2123 ph10 808 if (use_pcre16)
2124     {
2125     (void)to16(p, options & PCRE_UTF8, (int)strlen((char *)p));
2126     p = (pcre_uint8 *)buffer16;
2127     }
2128 ph10 805 #endif
2129 ph10 416
2130 ph10 805 /* Compile many times when timing */
2131    
2132 nigel 93 if (timeit > 0)
2133 nigel 3 {
2134     register int i;
2135     clock_t time_taken;
2136     clock_t start_time = clock();
2137 nigel 93 for (i = 0; i < timeit; i++)
2138 nigel 3 {
2139 ph10 808 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2140 nigel 3 if (re != NULL) free(re);
2141     }
2142     time_taken = clock() - start_time;
2143 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
2144     (((double)time_taken * 1000.0) / (double)timeit) /
2145 nigel 63 (double)CLOCKS_PER_SEC);
2146 nigel 3 }
2147    
2148 ph10 801 first_gotten_store = 0;
2149 ph10 808 PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2150 nigel 3
2151     /* Compilation failed; go back for another re, skipping to blank line
2152     if non-interactive. */
2153    
2154     if (re == NULL)
2155     {
2156     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
2157     SKIP_DATA:
2158     if (infile != stdin)
2159     {
2160     for (;;)
2161     {
2162 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
2163 nigel 11 {
2164     done = 1;
2165     goto CONTINUE;
2166     }
2167 nigel 3 len = (int)strlen((char *)buffer);
2168     while (len > 0 && isspace(buffer[len-1])) len--;
2169     if (len == 0) break;
2170     }
2171     fprintf(outfile, "\n");
2172     }
2173 nigel 25 goto CONTINUE;
2174 nigel 3 }
2175 ph10 416
2176     /* Compilation succeeded. It is now possible to set the UTF-8 option from
2177     within the regex; check for this so that we know how to process the data
2178 ph10 412 lines. */
2179 ph10 416
2180 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2181     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
2182 nigel 3
2183 ph10 801 /* Extract the size for possible writing before possibly flipping it,
2184     and remember the store that was got. */
2185 nigel 3
2186 ph10 801 true_size = ((real_pcre *)re)->size;
2187     regex_gotten_store = first_gotten_store;
2188    
2189     /* Output code size information if requested */
2190    
2191 nigel 63 if (log_store)
2192     fprintf(outfile, "Memory allocation (code space): %d\n",
2193 ph10 801 (int)(first_gotten_store -
2194 nigel 63 sizeof(real_pcre) -
2195     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2196    
2197 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
2198 ph10 654 help with the matching, unless the pattern has the SS option, which
2199 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
2200     never sensible). */
2201 nigel 75
2202 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
2203 nigel 75 {
2204 nigel 93 if (timeit > 0)
2205 nigel 75 {
2206     register int i;
2207     clock_t time_taken;
2208     clock_t start_time = clock();
2209 nigel 93 for (i = 0; i < timeit; i++)
2210 ph10 805 {
2211 ph10 808 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2212     }
2213 nigel 75 time_taken = clock() - start_time;
2214 zherczeg 809 if (extra != NULL)
2215     {
2216     PCRE_FREE_STUDY(extra);
2217     }
2218 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
2219     (((double)time_taken * 1000.0) / (double)timeit) /
2220 nigel 75 (double)CLOCKS_PER_SEC);
2221     }
2222 ph10 808 PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2223 nigel 75 if (error != NULL)
2224     fprintf(outfile, "Failed to study: %s\n", error);
2225     else if (extra != NULL)
2226 ph10 801 {
2227 nigel 75 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2228 ph10 801 if (log_store)
2229     {
2230     size_t jitsize;
2231     new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2232     if (jitsize != 0)
2233     fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2234     }
2235     }
2236 nigel 75 }
2237 ph10 512
2238 ph10 510 /* If /K was present, we set up for handling MARK data. */
2239 ph10 512
2240 ph10 510 if (do_mark)
2241     {
2242     if (extra == NULL)
2243     {
2244     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2245     extra->flags = 0;
2246     }
2247 ph10 512 extra->mark = &markptr;
2248 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
2249 ph10 512 }
2250 nigel 75
2251     /* If the 'F' option was present, we flip the bytes of all the integer
2252     fields in the regex data block and the study block. This is to make it
2253     possible to test PCRE's handling of byte-flipped patterns, e.g. those
2254     compiled on a different architecture. */
2255    
2256     if (do_flip)
2257     {
2258     real_pcre *rre = (real_pcre *)re;
2259 ph10 259 rre->magic_number =
2260 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
2261 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
2262     rre->options = byteflip(rre->options, sizeof(rre->options));
2263 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2264 ph10 259 rre->top_bracket =
2265 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2266 ph10 259 rre->top_backref =
2267 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2268 zherczeg 774 rre->first_char =
2269     (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2270     rre->req_char =
2271     (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2272 ph10 255 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2273 nigel 75 sizeof(rre->name_table_offset));
2274 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2275 nigel 75 sizeof(rre->name_entry_size));
2276 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2277 ph10 255 sizeof(rre->name_count));
2278 nigel 75
2279     if (extra != NULL)
2280     {
2281     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2282     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2283 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2284     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2285 nigel 75 }
2286     }
2287    
2288 ph10 805 /* Extract and display information from the compiled data if required. */
2289 nigel 75
2290     SHOW_INFO:
2291    
2292 nigel 93 if (do_debug)
2293     {
2294     fprintf(outfile, "------------------------------------------------------------------\n");
2295 zherczeg 809 #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2296 ph10 805 if (use_pcre16)
2297     pcre16_printint(re, outfile, debug_lengths);
2298 ph10 808 else
2299 ph10 805 pcre_printint(re, outfile, debug_lengths);
2300 zherczeg 809 #elif defined SUPPORT_PCRE8
2301     pcre_printint(re, outfile, debug_lengths);
2302     #else
2303     pcre16_printint(re, outfile, debug_lengths);
2304     #endif
2305 nigel 93 }
2306 ph10 416
2307 ph10 412 /* We already have the options in get_options (see above) */
2308 nigel 93
2309 nigel 25 if (do_showinfo)
2310 nigel 3 {
2311 ph10 412 unsigned long int all_options;
2312 nigel 79 #if !defined NOINFOCHECK
2313 nigel 43 int old_first_char, old_options, old_count;
2314 nigel 79 #endif
2315 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
2316 ph10 227 hascrorlf;
2317 nigel 63 int nameentrysize, namecount;
2318 ph10 756 const pcre_uchar *nametable;
2319 nigel 3
2320 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
2321     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2322     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
2323 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
2324 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
2325 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
2326     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
2327 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
2328 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
2329     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2330 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2331 nigel 43
2332 ph10 805 /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2333     that it gives the same results as the new function. */
2334 ph10 808
2335 nigel 79 #if !defined NOINFOCHECK
2336 ph10 805 if (!use_pcre16)
2337 ph10 808 {
2338 ph10 805 old_count = pcre_info(re, &old_options, &old_first_char);
2339     if (count < 0) fprintf(outfile,
2340     "Error %d from pcre_info()\n", count);
2341     else
2342     {
2343     if (old_count != count) fprintf(outfile,
2344     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2345     old_count);
2346 ph10 808
2347 ph10 805 if (old_first_char != first_char) fprintf(outfile,
2348     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2349     first_char, old_first_char);
2350 ph10 808
2351 ph10 805 if (old_options != (int)get_options) fprintf(outfile,
2352     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2353     get_options, old_options);
2354     }
2355 ph10 808 }
2356 nigel 79 #endif
2357 nigel 43
2358 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
2359 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2360 nigel 77 (int)size, (int)regex_gotten_store);
2361 nigel 43
2362     fprintf(outfile, "Capturing subpattern count = %d\n", count);
2363     if (backrefmax > 0)
2364     fprintf(outfile, "Max back reference = %d\n", backrefmax);
2365 nigel 63
2366     if (namecount > 0)
2367     {
2368     fprintf(outfile, "Named capturing subpatterns:\n");
2369     while (namecount-- > 0)
2370     {
2371     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2372     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2373     GET2(nametable, 0));
2374     nametable += nameentrysize;
2375     }
2376     }
2377 ph10 172
2378 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2379 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2380 nigel 63
2381 nigel 75 all_options = ((real_pcre *)re)->options;
2382 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2383 nigel 75
2384 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
2385 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2386 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2387     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2388     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2389     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2390 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2391 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2392 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2393     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2394 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2395     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2396     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2397 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2398 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2399 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2400 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2401 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2402 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2403 ph10 172
2404 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2405 nigel 43
2406 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2407 nigel 91 {
2408     case PCRE_NEWLINE_CR:
2409     fprintf(outfile, "Forced newline sequence: CR\n");
2410     break;
2411 nigel 43
2412 nigel 91 case PCRE_NEWLINE_LF:
2413     fprintf(outfile, "Forced newline sequence: LF\n");
2414     break;
2415    
2416     case PCRE_NEWLINE_CRLF:
2417     fprintf(outfile, "Forced newline sequence: CRLF\n");
2418     break;
2419    
2420 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2421     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2422     break;
2423    
2424 nigel 93 case PCRE_NEWLINE_ANY:
2425     fprintf(outfile, "Forced newline sequence: ANY\n");
2426     break;
2427    
2428 nigel 91 default:
2429     break;
2430     }
2431    
2432 nigel 43 if (first_char == -1)
2433     {
2434 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2435 nigel 43 }
2436     else if (first_char < 0)
2437     {
2438     fprintf(outfile, "No first char\n");
2439     }
2440     else
2441     {
2442 zherczeg 774 const char *caseless =
2443     ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2444 nigel 63 "" : " (caseless)";
2445 zherczeg 774
2446 ph10 808 if (PRINTOK(first_char))
2447 zherczeg 774 fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2448 nigel 3 else
2449 zherczeg 774 fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2450 nigel 43 }
2451 nigel 37
2452 nigel 43 if (need_char < 0)
2453     {
2454     fprintf(outfile, "No need char\n");
2455 nigel 3 }
2456 nigel 43 else
2457     {
2458 zherczeg 774 const char *caseless =
2459     ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2460 nigel 63 "" : " (caseless)";
2461 zherczeg 774
2462 ph10 808 if (PRINTOK(need_char))
2463 zherczeg 774 fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2464 nigel 43 else
2465 zherczeg 774 fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2466 nigel 43 }
2467 nigel 75
2468     /* Don't output study size; at present it is in any case a fixed
2469     value, but it varies, depending on the computer architecture, and
2470     so messes up the test suite. (And with the /F option, it might be
2471 ph10 654 flipped.) If study was forced by an external -s, don't show this
2472 ph10 612 information unless -i or -d was also present. This means that, except
2473     when auto-callouts are involved, the output from runs with and without
2474     -s should be identical. */
2475 nigel 75
2476 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2477 nigel 75 {
2478     if (extra == NULL)
2479     fprintf(outfile, "Study returned NULL\n");
2480     else
2481     {
2482 ph10 756 pcre_uint8 *start_bits = NULL;
2483 ph10 455 int minlength;
2484 ph10 461
2485 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2486 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2487    
2488 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2489     if (start_bits == NULL)
2490 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2491 nigel 75 else
2492     {
2493     int i;
2494     int c = 24;
2495     fprintf(outfile, "Starting byte set: ");
2496     for (i = 0; i < 256; i++)
2497     {
2498     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2499     {
2500     if (c > 75)
2501     {
2502     fprintf(outfile, "\n ");
2503     c = 2;
2504     }
2505 ph10 808 if (PRINTOK(i) && i != ' ')
2506 nigel 75 {
2507     fprintf(outfile, "%c ", i);
2508     c += 2;
2509     }
2510     else
2511     {
2512     fprintf(outfile, "\\x%02x ", i);
2513     c += 5;
2514     }
2515     }
2516     }
2517     fprintf(outfile, "\n");
2518     }
2519     }
2520 ph10 691
2521 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
2522 ph10 691
2523 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2524     {
2525 ph10 691 int jit;
2526 ph10 667 new_info(re, extra, PCRE_INFO_JIT, &jit);
2527 ph10 691 if (jit)
2528     fprintf(outfile, "JIT study was successful\n");
2529     else
2530     #ifdef SUPPORT_JIT
2531     fprintf(outfile, "JIT study was not successful\n");
2532 ph10 667 #else
2533 ph10 691 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2534 ph10 667 #endif
2535 ph10 691 }
2536 nigel 75 }
2537 nigel 3 }
2538    
2539 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2540     that is all. The first 8 bytes of the file are the regex length and then
2541     the study length, in big-endian order. */
2542 nigel 3
2543 nigel 75 if (to_file != NULL)
2544 nigel 3 {
2545 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2546     if (f == NULL)
2547 nigel 3 {
2548 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2549 nigel 3 }
2550 nigel 75 else
2551     {
2552 ph10 756 pcre_uint8 sbuf[8];
2553     sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2554     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2555     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
2556     sbuf[3] = (pcre_uint8)((true_size) & 255);
2557 ph10 259
2558 ph10 756 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2559     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2560     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
2561     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2562 nigel 3
2563 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2564     fwrite(re, 1, true_size, f) < true_size)
2565     {
2566     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2567     }
2568 nigel 3 else
2569     {
2570 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2571 ph10 654
2572 ph10 658 /* If there is study data, write it. */
2573 ph10 654
2574 nigel 75 if (extra != NULL)
2575 nigel 3 {
2576 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2577     true_study_size)
2578 nigel 3 {
2579 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2580     strerror(errno));
2581 nigel 3 }
2582 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2583 nigel 3 }
2584     }
2585 nigel 75 fclose(f);
2586 nigel 3 }
2587 nigel 77
2588     new_free(re);
2589 zherczeg 809 if (extra != NULL)
2590     {
2591     PCRE_FREE_STUDY(extra);
2592     }
2593 ph10 545 if (locale_set)
2594 ph10 541 {
2595     new_free((void *)tables);
2596     setlocale(LC_CTYPE, "C");
2597 ph10 545 locale_set = 0;
2598     }
2599 nigel 75 continue; /* With next regex */
2600 nigel 3 }
2601 nigel 75 } /* End of non-POSIX compile */
2602 nigel 3
2603     /* Read data lines and test them */
2604    
2605     for (;;)
2606     {
2607 ph10 756 pcre_uint8 *q;
2608     pcre_uint8 *bptr;
2609 nigel 57 int *use_offsets = offsets;
2610 nigel 53 int use_size_offsets = size_offsets;
2611 nigel 63 int callout_data = 0;
2612     int callout_data_set = 0;
2613 nigel 3 int count, c;
2614 nigel 29 int copystrings = 0;
2615 ph10 386 int find_match_limit = default_find_match_limit;
2616 nigel 29 int getstrings = 0;
2617     int getlist = 0;
2618 nigel 39 int gmatched = 0;
2619 nigel 35 int start_offset = 0;
2620 ph10 579 int start_offset_sign = 1;
2621 nigel 41 int g_notempty = 0;
2622 nigel 77 int use_dfa = 0;
2623 nigel 3
2624     options = 0;
2625    
2626 nigel 91 *copynames = 0;
2627     *getnames = 0;
2628    
2629     copynamesptr = copynames;
2630     getnamesptr = getnames;
2631    
2632 nigel 63 pcre_callout = callout;
2633     first_callout = 1;
2634 ph10 654 last_callout_mark = NULL;
2635 nigel 63 callout_extra = 0;
2636     callout_count = 0;
2637     callout_fail_count = 999999;
2638     callout_fail_id = -1;
2639 nigel 73 show_malloc = 0;
2640 nigel 63
2641 nigel 91 if (extra != NULL) extra->flags &=
2642     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2643    
2644     len = 0;
2645     for (;;)
2646 nigel 11 {
2647 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2648 nigel 91 {
2649 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2650     {
2651 ph10 545 fprintf(outfile, "\n");
2652 ph10 537 break;
2653 ph10 545 }
2654 nigel 91 done = 1;
2655     goto CONTINUE;
2656     }
2657     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2658     len = (int)strlen((char *)buffer);
2659     if (buffer[len-1] == '\n') break;
2660 nigel 11 }
2661 nigel 3
2662     while (len > 0 && isspace(buffer[len-1])) len--;
2663     buffer[len] = 0;
2664     if (len == 0) break;
2665    
2666     p = buffer;
2667     while (isspace(*p)) p++;
2668    
2669 ph10 147 bptr = q = dbuffer;
2670 nigel 3 while ((c = *p++) != 0)
2671     {
2672     int i = 0;
2673     int n = 0;
2674 nigel 63
2675 nigel 3 if (c == '\\') switch ((c = *p++))
2676     {
2677     case 'a': c = 7; break;
2678     case 'b': c = '\b'; break;
2679     case 'e': c = 27; break;
2680     case 'f': c = '\f'; break;
2681     case 'n': c = '\n'; break;
2682     case 'r': c = '\r'; break;
2683     case 't': c = '\t'; break;
2684     case 'v': c = '\v'; break;
2685    
2686     case '0': case '1': case '2': case '3':
2687     case '4': case '5': case '6': case '7':
2688     c -= '0';
2689     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2690     c = c * 8 + *p++ - '0';
2691 nigel 91
2692     #if !defined NOUTF8
2693     if (use_utf8 && c > 255)
2694     {
2695 ph10 808 pcre_uint8 buff8[8];
2696 nigel 91 int ii, utn;
2697     utn = ord2utf8(c, buff8);
2698     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2699     c = buff8[ii]; /* Last byte */
2700     }
2701     #endif
2702 nigel 3 break;
2703    
2704     case 'x':
2705 nigel 49
2706     /* Handle \x{..} specially - new Perl thing for utf8 */
2707    
2708 nigel 79 #if !defined NOUTF8
2709 nigel 49 if (*p == '{')
2710     {
2711 ph10 808 pcre_uint8 *pt = p;
2712 nigel 49 c = 0;
2713 ph10 738
2714 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2715     when isxdigit() is a macro that refers to its argument more than
2716     once. This is banned by the C Standard, but apparently happens in at
2717     least one MacOS environment. */
2718 ph10 738
2719 ph10 735 for (pt++; isxdigit(*pt); pt++)
2720 ph10 734 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2721 nigel 49 if (*pt == '}')
2722     {
2723 ph10 808 pcre_uint8 buff8[8];
2724 nigel 49 int ii, utn;
2725 ph10 355 if (use_utf8)
2726 ph10 358 {
2727 ph10 355 utn = ord2utf8(c, buff8);
2728     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2729     c = buff8[ii]; /* Last byte */
2730     }
2731     else
2732     {
2733 ph10 358 if (c > 255)
2734 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2735     "UTF-8 mode is not enabled.\n"
2736     "** Truncation will probably give the wrong result.\n", c);
2737 ph10 358 }
2738 nigel 49 p = pt + 1;
2739     break;
2740     }
2741     /* Not correct form; fall through */
2742     }
2743 nigel 79 #endif
2744 nigel 49
2745     /* Ordinary \x */
2746    
2747 nigel 3 c = 0;
2748     while (i++ < 2 && isxdigit(*p))
2749     {
2750 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2751 nigel 3 p++;
2752     }
2753     break;
2754    
2755 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2756 nigel 3 p--;
2757     continue;
2758    
2759 nigel 75 case '>':
2760 ph10 579 if (*p == '-')
2761 ph10 567 {
2762     start_offset_sign = -1;
2763     p++;
2764 ph10 579 }
2765 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2766 ph10 579 start_offset *= start_offset_sign;
2767 nigel 75 continue;
2768    
2769 nigel 3 case 'A': /* Option setting */
2770     options |= PCRE_ANCHORED;
2771     continue;
2772    
2773     case 'B':
2774     options |= PCRE_NOTBOL;
2775     continue;
2776    
2777 nigel 29 case 'C':
2778 nigel 63 if (isdigit(*p)) /* Set copy string */
2779     {
2780     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2781     copystrings |= 1 << n;
2782     }
2783     else if (isalnum(*p))
2784     {
2785 ph10 756 pcre_uchar *npp = copynamesptr;
2786 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2787 nigel 91 *npp++ = 0;
2788 nigel 67 *npp = 0;
2789 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2790 nigel 63 if (n < 0)
2791 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2792     copynamesptr = npp;
2793 nigel 63 }
2794     else if (*p == '+')
2795     {
2796     callout_extra = 1;
2797     p++;
2798     }
2799     else if (*p == '-')
2800     {
2801     pcre_callout = NULL;
2802     p++;
2803     }
2804     else if (*p == '!')
2805     {
2806     callout_fail_id = 0;
2807     p++;
2808     while(isdigit(*p))
2809     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2810     callout_fail_count = 0;
2811     if (*p == '!')
2812     {
2813     p++;
2814     while(isdigit(*p))
2815     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2816     }
2817     }
2818     else if (*p == '*')
2819     {
2820     int sign = 1;
2821     callout_data = 0;
2822     if (*(++p) == '-') { sign = -1; p++; }
2823     while(isdigit(*p))
2824     callout_data = callout_data * 10 + *p++ - '0';
2825     callout_data *= sign;
2826     callout_data_set = 1;
2827     }
2828 nigel 29 continue;
2829    
2830 nigel 79 #if !defined NODFA
2831 nigel 77 case 'D':
2832 nigel 79 #if !defined NOPOSIX
2833 nigel 77 if (posix || do_posix)
2834     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2835     else
2836 nigel 79 #endif
2837 nigel 77 use_dfa = 1;
2838     continue;
2839 ph10 553 #endif
2840 nigel 77
2841 ph10 553 #if !defined NODFA
2842 nigel 77 case 'F':
2843     options |= PCRE_DFA_SHORTEST;
2844     continue;
2845 nigel 79 #endif
2846 nigel 77
2847 nigel 29 case 'G':
2848 nigel 63 if (isdigit(*p))
2849     {
2850     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2851     getstrings |= 1 << n;
2852     }
2853     else if (isalnum(*p))
2854     {
2855 ph10 756 pcre_uchar *npp = getnamesptr;
2856 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2857 nigel 91 *npp++ = 0;
2858 nigel 67 *npp = 0;
2859 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2860 nigel 63 if (n < 0)
2861 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2862     getnamesptr = npp;
2863 nigel 63 }
2864 nigel 29 continue;
2865 ph10 691
2866 ph10 667 case 'J':
2867     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2868 ph10 691 if (extra != NULL
2869     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2870 ph10 667 && extra->executable_jit != NULL)
2871 ph10 691 {
2872 ph10 667 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2873     jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2874 ph10 675 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2875 ph10 691 }
2876 ph10 667 continue;
2877 nigel 29
2878     case 'L':
2879     getlist = 1;
2880     continue;
2881    
2882 nigel 63 case 'M':
2883     find_match_limit = 1;
2884     continue;
2885    
2886 nigel 37 case 'N':
2887 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2888     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2889 ph10 461 else
2890 ph10 442 options |= PCRE_NOTEMPTY;
2891 nigel 37 continue;
2892    
2893 nigel 3 case 'O':
2894     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2895 nigel 53 if (n > size_offsets_max)
2896     {
2897     size_offsets_max = n;
2898 nigel 57 free(offsets);
2899 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2900 nigel 53 if (offsets == NULL)
2901     {
2902     printf("** Failed to get %d bytes of memory for offsets vector\n",
2903 ph10 151 (int)(size_offsets_max * sizeof(int)));
2904 nigel 77 yield = 1;
2905     goto EXIT;
2906 nigel 53 }
2907     }
2908     use_size_offsets = n;
2909 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2910 nigel 3 continue;
2911    
2912 nigel 75 case 'P':
2913 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2914 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2915 nigel 75 continue;
2916    
2917 nigel 91 case 'Q':
2918     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2919     if (extra == NULL)
2920     {
2921     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2922     extra->flags = 0;
2923     }
2924     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2925     extra->match_limit_recursion = n;
2926     continue;
2927    
2928     case 'q':
2929     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2930     if (extra == NULL)
2931     {
2932     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2933     extra->flags = 0;
2934     }
2935     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2936     extra->match_limit = n;
2937     continue;
2938    
2939 nigel 79 #if !defined NODFA
2940 nigel 77 case 'R':
2941     options |= PCRE_DFA_RESTART;
2942     continue;
2943 nigel 79 #endif
2944 nigel 77
2945 nigel 73 case 'S':
2946     show_malloc = 1;
2947     continue;
2948 ph10 392
2949 ph10 389 case 'Y':
2950     options |= PCRE_NO_START_OPTIMIZE;
2951 ph10 392 continue;
2952 nigel 73
2953 nigel 3 case 'Z':
2954     options |= PCRE_NOTEOL;
2955     continue;
2956 nigel 71
2957     case '?':
2958     options |= PCRE_NO_UTF8_CHECK;
2959     continue;
2960 nigel 91
2961     case '<':
2962     {
2963     int x = check_newline(p, outfile);
2964     if (x == 0) goto NEXT_DATA;
2965     options |= x;
2966     while (*p++ != '>');
2967     }
2968     continue;
2969 nigel 3 }
2970 nigel 9 *q++ = c;
2971 nigel 3 }
2972 nigel 9 *q = 0;
2973 ph10 530 len = (int)(q - dbuffer);
2974 ph10 545
2975 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2976 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2977 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2978 ph10 371
2979 ph10 363 #if !defined NOPOSIX
2980     if (posix || do_posix)
2981     {
2982     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2983 ph10 371 bptr += buffer_size - len - 1;
2984 ph10 363 }
2985 ph10 371 else
2986     #endif
2987 ph10 363 {
2988     memmove(bptr + buffer_size - len, bptr, len);
2989 ph10 371 bptr += buffer_size - len;
2990     }
2991 nigel 3
2992 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2993     {
2994     printf("**Match limit not relevant for DFA matching: ignored\n");
2995     find_match_limit = 0;
2996     }
2997    
2998 nigel 3 /* Handle matching via the POSIX interface, which does not
2999 nigel 63 support timing or playing with the match limit or callout data. */
3000 nigel 3
3001 nigel 37 #if !defined NOPOSIX
3002 nigel 3 if (posix || do_posix)
3003     {
3004     int rc;
3005     int eflags = 0;
3006 nigel 63 regmatch_t *pmatch = NULL;
3007     if (use_size_offsets > 0)
3008 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3009 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3010     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3011 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3012 nigel 3
3013 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3014 nigel 3
3015     if (rc != 0)
3016     {
3017 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
3018 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
3019     }
3020 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
3021     != 0)
3022     {
3023     fprintf(outfile, "Matched with REG_NOSUB\n");
3024     }
3025 nigel 3 else
3026     {
3027 nigel 7 size_t i;
3028 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
3029 nigel 3 {
3030     if (pmatch[i].rm_so >= 0)
3031     {
3032 nigel 23 fprintf(outfile, "%2d: ", (int)i);
3033 ph10 808 PCHARSV(dbuffer + pmatch[i].rm_so,
3034 nigel 63 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3035 nigel 3 fprintf(outfile, "\n");
3036 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3037 nigel 35 {
3038 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
3039 ph10 808 PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3040 nigel 63 outfile);
3041 nigel 35 fprintf(outfile, "\n");
3042     }
3043 nigel 3 }
3044     }
3045     }
3046 nigel 53 free(pmatch);
3047 ph10 808 goto NEXT_DATA;
3048 nigel 3 }
3049    
3050 ph10 808 #endif /* !defined NOPOSIX */
3051    
3052 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
3053 nigel 3
3054 ph10 808 #ifdef SUPPORT_PCRE16
3055     if (use_pcre16)
3056     {
3057     len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3058     bptr = (pcre_uint8 *)buffer16;
3059     }
3060     #endif
3061 nigel 37
3062 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
3063 nigel 3 {
3064 ph10 512 markptr = NULL;
3065    
3066 nigel 93 if (timeitm > 0)
3067 nigel 3 {
3068     register int i;
3069     clock_t time_taken;
3070     clock_t start_time = clock();
3071 nigel 77
3072 nigel 79 #if !defined NODFA
3073 nigel 77 if (all_use_dfa || use_dfa)
3074     {
3075     int workspace[1000];
3076 nigel 93 for (i = 0; i < timeitm; i++)
3077 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3078 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
3079     sizeof(workspace)/sizeof(int));
3080     }
3081     else
3082 nigel 79 #endif
3083 nigel 77
3084 nigel 93 for (i = 0; i < timeitm; i++)
3085 ph10 808 {
3086     PCRE_EXEC(count, re, extra, bptr, len,
3087 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
3088 ph10 808 }
3089 nigel 3 time_taken = clock() - start_time;
3090 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
3091     (((double)time_taken * 1000.0) / (double)timeitm) /
3092 nigel 63 (double)CLOCKS_PER_SEC);
3093 nigel 3 }
3094    
3095 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
3096 nigel 87 varying limits in order to find the minimum value for the match limit and
3097 ph10 667 for the recursion limit. The match limits are relevant only to the normal
3098     running of pcre_exec(), so disable the JIT optimization. This makes it
3099     possible to run the same set of tests with and without JIT externally
3100     requested. */
3101 nigel 63
3102     if (find_match_limit)
3103     {
3104     if (extra == NULL)
3105     {
3106 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3107 nigel 63 extra->flags = 0;
3108     }
3109 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3110 ph10 691
3111 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
3112 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
3113     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
3114     PCRE_ERROR_MATCHLIMIT, "match()");
3115 nigel 63
3116 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
3117     options|g_notempty, use_offsets, use_size_offsets,
3118     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
3119     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
3120 nigel 63 }
3121    
3122     /* If callout_data is set, use the interface with additional data */
3123    
3124     else if (callout_data_set)
3125     {
3126     if (extra == NULL)
3127     {
3128 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3129 nigel 63 extra->flags = 0;
3130     }
3131     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3132 nigel 71 extra->callout_data = &callout_data;
3133 ph10 808 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3134 nigel 63 options | g_notempty, use_offsets, use_size_offsets);
3135     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3136     }
3137    
3138     /* The normal case is just to do the match once, with the default
3139     value of match_limit. */
3140    
3141 nigel 79 #if !defined NODFA
3142 nigel 77 else if (all_use_dfa || use_dfa)
3143     {
3144     int workspace[1000];
3145 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3146 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
3147     sizeof(workspace)/sizeof(int));
3148     if (count == 0)
3149     {
3150     fprintf(outfile, "Matched, but too many subsidiary matches\n");
3151     count = use_size_offsets/2;
3152     }
3153     }
3154 nigel 79 #endif
3155 nigel 77
3156 nigel 75 else
3157     {
3158 ph10 808 PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3159     options | g_notempty, use_offsets, use_size_offsets);
3160 nigel 77 if (count == 0)
3161     {
3162     fprintf(outfile, "Matched, but too many substrings\n");
3163     count = use_size_offsets/3;
3164     }
3165 nigel 75 }
3166 nigel 3
3167 nigel 39 /* Matched */
3168    
3169 nigel 3 if (count >= 0)
3170     {
3171 nigel 93 int i, maxcount;
3172    
3173     #if !defined NODFA
3174     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
3175     #endif
3176     maxcount = use_size_offsets/3;
3177    
3178     /* This is a check against a lunatic return value. */
3179    
3180     if (count > maxcount)
3181     {
3182     fprintf(outfile,
3183     "** PCRE error: returned count %d is too big for offset size %d\n",
3184     count, use_size_offsets);
3185     count = use_size_offsets/3;
3186     if (do_g || do_G)
3187     {
3188     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
3189     do_g = do_G = FALSE; /* Break g/G loop */
3190     }
3191     }
3192 ph10 654
3193 ph10 626 /* do_allcaps requests showing of all captures in the pattern, to check
3194     unset ones at the end. */
3195 ph10 654
3196 ph10 626 if (do_allcaps)
3197     {
3198     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3199 ph10 654 count++; /* Allow for full match */
3200     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3201     }
3202 nigel 93
3203 ph10 626 /* Output the captured substrings */
3204 ph10 654
3205 nigel 29 for (i = 0; i < count * 2; i += 2)
3206 nigel 3 {
3207 nigel 57 if (use_offsets[i] < 0)
3208 ph10 654 {
3209 ph10 626 if (use_offsets[i] != -1)
3210     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3211 ph10 654 use_offsets[i], i);
3212 ph10 626 if (use_offsets[i+1] != -1)
3213     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3214 ph10 654 use_offsets[i+1], i+1);
3215 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
3216 ph10 654 }
3217 nigel 3 else
3218     {
3219     fprintf(outfile, "%2d: ", i/2);
3220 ph10 808 PCHARSV(bptr + use_offsets[i],
3221 nigel 63 use_offsets[i+1] - use_offsets[i], outfile);
3222 nigel 3 fprintf(outfile, "\n");
3223 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
3224 nigel 35 {
3225 ph10 616 fprintf(outfile, "%2d+ ", i/2);
3226 ph10 808 PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3227 ph10 616 outfile);
3228     fprintf(outfile, "\n");
3229 nigel 35 }
3230 nigel 3 }
3231     }
3232 ph10 512
3233 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3234 nigel 29
3235     for (i = 0; i < 32; i++)
3236     {
3237     if ((copystrings & (1 << i)) != 0)
3238     {
3239 nigel 91 char copybuffer[256];
3240 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
3241 nigel 37 i, copybuffer, sizeof(copybuffer));
3242 nigel 29 if (rc < 0)
3243     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3244     else
3245 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
3246 nigel 29 }
3247     }
3248    
3249 nigel 91 for (copynamesptr = copynames;
3250     *copynamesptr != 0;
3251     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
3252     {
3253     char copybuffer[256];
3254     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
3255     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
3256     if (rc < 0)
3257     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
3258     else
3259     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
3260     }
3261    
3262 nigel 29 for (i = 0; i < 32; i++)
3263     {
3264     if ((getstrings & (1 << i)) != 0)
3265     {
3266     const char *substring;
3267 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
3268 nigel 29 i, &substring);
3269     if (rc < 0)
3270     fprintf(outfile, "get substring %d failed %d\n", i, rc);
3271     else
3272     {
3273     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
3274 nigel 49 pcre_free_substring(substring);
3275 nigel 29 }
3276     }
3277     }
3278    
3279 nigel 91 for (getnamesptr = getnames;
3280     *getnamesptr != 0;
3281     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
3282     {
3283     const char *substring;
3284     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
3285     count, (char *)getnamesptr, &substring);
3286     if (rc < 0)
3287     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
3288     else
3289     {
3290     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
3291     pcre_free_substring(substring);
3292     }
3293     }
3294    
3295 nigel 29 if (getlist)
3296     {
3297     const char **stringlist;
3298 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
3299 nigel 29 &stringlist);
3300     if (rc < 0)
3301     fprintf(outfile, "get substring list failed %d\n", rc);
3302     else
3303     {
3304     for (i = 0; i < count; i++)
3305     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3306     if (stringlist[i] != NULL)
3307     fprintf(outfile, "string list not terminated by NULL\n");
3308 nigel 49 pcre_free_substring_list(stringlist);
3309 nigel 29 }
3310     }
3311 nigel 39 }
3312 nigel 29
3313 nigel 75 /* There was a partial match */
3314    
3315     else if (count == PCRE_ERROR_PARTIAL)
3316     {
3317 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
3318     else fprintf(outfile, "Partial match, mark=%s", markptr);
3319 ph10 426 if (use_size_offsets > 1)
3320     {
3321     fprintf(outfile, ": ");
3322 ph10 808 PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3323 ph10 461 outfile);
3324     }
3325 nigel 77 fprintf(outfile, "\n");
3326 nigel 75 break; /* Out of the /g loop */
3327     }
3328    
3329 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
3330 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
3331     to advance the start offset, and continue. We won't be at the end of the
3332     string - that was checked before setting g_notempty.
3333 nigel 39
3334 ph10 566 Complication arises in the case when the newline convention is "any",
3335 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
3336     terminated by CRLF, an advance of one character just passes the \r,
3337 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
3338 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
3339     newline setting in the pattern; if none was set, use pcre_config() to
3340 ph10 566 find the default.
3341 ph10 144
3342 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
3343     character, not one byte. */
3344    
3345 nigel 3 else
3346     {
3347 nigel 41 if (g_notempty != 0)
3348 nigel 35 {
3349 nigel 73 int onechar = 1;
3350 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
3351 nigel 57 use_offsets[0] = start_offset;
3352 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
3353     {
3354     int d;
3355     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3356 ph10 391 /* Note that these values are always the ASCII ones, even in
3357     EBCDIC environments. CR = 13, NL = 10. */
3358     obits = (d == 13)? PCRE_NEWLINE_CR :
3359     (d == 10)? PCRE_NEWLINE_LF :
3360     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3361 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3362 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
3363     }
3364 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3365 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3366 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3367 ph10 149 &&
3368 ph10 143 start_offset < len - 1 &&
3369     bptr[start_offset] == '\r' &&
3370     bptr[start_offset+1] == '\n')
3371 ph10 144 onechar++;
3372 ph10 143 else if (use_utf8)
3373 nigel 73 {
3374     while (start_offset + onechar < len)
3375     {
3376 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3377 ph10 579 onechar++;
3378 nigel 73 }
3379     }
3380     use_offsets[1] = start_offset + onechar;
3381 nigel 35 }
3382 nigel 41 else
3383     {
3384 ph10 598 switch(count)
3385 ph10 654 {
3386 ph10 598 case PCRE_ERROR_NOMATCH:
3387 ph10 512 if (gmatched == 0)
3388 ph10 510 {
3389     if (markptr == NULL) fprintf(outfile, "No match\n");
3390     else fprintf(outfile, "No match, mark = %s\n", markptr);
3391 ph10 512 }
3392 ph10 598 break;
3393 ph10 654
3394 ph10 598 case PCRE_ERROR_BADUTF8:
3395     case PCRE_ERROR_SHORTUTF8:
3396     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3397     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3398     if (use_size_offsets >= 2)
3399 ph10 654 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3400 ph10 598 use_offsets[1]);
3401 ph10 654 fprintf(outfile, "\n");
3402     break;
3403    
3404 ph10 598 default:
3405 ph10 654 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3406 ph10 604 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3407 ph10 654 else
3408     fprintf(outfile, "Error %d (Unexpected value)\n", count);
3409 ph10 598 break;
3410 nigel 41 }
3411 ph10 654
3412 nigel 41 break; /* Out of the /g loop */
3413     }
3414 nigel 3 }
3415 nigel 35
3416 nigel 39 /* If not /g or /G we are done */
3417    
3418     if (!do_g && !do_G) break;
3419    
3420 nigel 41 /* If we have matched an empty string, first check to see if we are at
3421 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3422     Perl's /g options does. This turns out to be rather cunning. First we set
3423     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3424 nigel 47 same point. If this fails (picked up above) we advance to the next
3425 ph10 143 character. */
3426 ph10 142
3427 nigel 41 g_notempty = 0;
3428 ph10 142
3429 nigel 57 if (use_offsets[0] == use_offsets[1])
3430 nigel 41 {
3431 nigel 57 if (use_offsets[0] == len) break;
3432 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3433 nigel 41 }
3434 nigel 39
3435     /* For /g, update the start offset, leaving the rest alone */
3436    
3437 ph10 143 if (do_g) start_offset = use_offsets[1];
3438 nigel 39
3439     /* For /G, update the pointer and length */
3440    
3441     else
3442 nigel 35 {
3443 ph10 143 bptr += use_offsets[1];
3444     len -= use_offsets[1];
3445 nigel 35 }
3446 nigel 39 } /* End of loop for /g and /G */
3447 nigel 91
3448     NEXT_DATA: continue;
3449 nigel 39 } /* End of loop for data lines */
3450 nigel 3
3451 nigel 11 CONTINUE:
3452 nigel 37
3453     #if !defined NOPOSIX
3454 nigel 3 if (posix || do_posix) regfree(&preg);
3455 nigel 37 #endif
3456    
3457 nigel 77 if (re != NULL) new_free(re);
3458 zherczeg 809 if (extra != NULL)
3459     {
3460     PCRE_FREE_STUDY(extra);
3461     }
3462 ph10 541 if (locale_set)
3463 nigel 25 {
3464 nigel 77 new_free((void *)tables);
3465 nigel 25 setlocale(LC_CTYPE, "C");
3466 nigel 93 locale_set = 0;
3467 nigel 25 }
3468 ph10 691 if (jit_stack != NULL)
3469 ph10 667 {
3470     pcre_jit_stack_free(jit_stack);
3471 ph10 691 jit_stack = NULL;
3472     }
3473 nigel 3 }
3474    
3475 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3476 nigel 77
3477     EXIT:
3478    
3479     if (infile != NULL && infile != stdin) fclose(infile);
3480     if (outfile != NULL && outfile != stdout) fclose(outfile);
3481    
3482     free(buffer);
3483     free(dbuffer);
3484     free(pbuffer);
3485     free(offsets);
3486    
3487 ph10 805 #ifdef SUPPORT_PCRE16
3488     if (buffer16 != NULL) free(buffer16);
3489     #endif
3490    
3491 nigel 77 return yield;
3492 nigel 3 }
3493    
3494 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12