/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 801 - (hide annotations) (download)
Mon Dec 12 16:23:37 2011 UTC (2 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 94524 byte(s)
Merge changes from trunk r755 to r800 into the 16-bit branch.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 ph10 667 #define _pcre_ucp_typerange ucp_typerange
116 nigel 85 #define _pcre_utf8_table1 utf8_table1
117     #define _pcre_utf8_table1_size utf8_table1_size
118     #define _pcre_utf8_table2 utf8_table2
119     #define _pcre_utf8_table3 utf8_table3
120     #define _pcre_utf8_table4 utf8_table4
121     #define _pcre_utt utt
122     #define _pcre_utt_size utt_size
123 ph10 240 #define _pcre_utt_names utt_names
124 nigel 85 #define _pcre_OP_lengths OP_lengths
125    
126     #include "pcre_tables.c"
127    
128     /* We also need the pcre_printint() function for printing out compiled
129     patterns. This function is in a separate file so that it can be included in
130 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
131 ph10 498 know which case is being compiled. */
132 nigel 85
133 ph10 498 #define COMPILING_PCRETEST
134     #include "pcre_printint.src"
135    
136     /* The definition of the macro PRINTABLE, which determines whether to print an
137 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
138 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
139     locale has not been explicitly changed, so as to get consistent output from
140     systems that differ in their output from isprint() even in the "C" locale. */
141 nigel 93
142     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
143 nigel 85
144 nigel 37 /* It is possible to compile this test program without including support for
145     testing the POSIX interface, though this is not available via the standard
146     Makefile. */
147    
148     #if !defined NOPOSIX
149 nigel 3 #include "pcreposix.h"
150 nigel 37 #endif
151 nigel 3
152 ph10 107 /* It is also possible, for the benefit of the version currently imported into
153     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
154     interface to the DFA matcher (NODFA), and without the doublecheck of the old
155     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
156     UTF8 support if PCRE is built without it. */
157 nigel 79
158 ph10 107 #ifndef SUPPORT_UTF8
159     #ifndef NOUTF8
160     #define NOUTF8
161     #endif
162     #endif
163 nigel 79
164 ph10 107
165 nigel 85 /* Other parameters */
166    
167 nigel 3 #ifndef CLOCKS_PER_SEC
168     #ifdef CLK_TCK
169     #define CLOCKS_PER_SEC CLK_TCK
170     #else
171     #define CLOCKS_PER_SEC 100
172     #endif
173     #endif
174    
175 nigel 93 /* This is the default loop count for timing. */
176    
177 nigel 75 #define LOOPREPEAT 500000
178 nigel 3
179 nigel 85 /* Static variables */
180    
181 nigel 3 static FILE *outfile;
182     static int log_store = 0;
183 nigel 63 static int callout_count;
184     static int callout_extra;
185     static int callout_fail_count;
186     static int callout_fail_id;
187 ph10 210 static int debug_lengths;
188 nigel 63 static int first_callout;
189 nigel 93 static int locale_set = 0;
190 nigel 73 static int show_malloc;
191 nigel 67 static int use_utf8;
192 nigel 43 static size_t gotten_store;
193 ph10 801 static size_t first_gotten_store = 0;
194 ph10 645 static const unsigned char *last_callout_mark = NULL;
195 nigel 3
196 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
197    
198     static int buffer_size = 50000;
199 ph10 756 static pcre_uint8 *buffer = NULL;
200     static pcre_uint8 *dbuffer = NULL;
201     static pcre_uint8 *pbuffer = NULL;
202 nigel 3
203 ph10 598 /* Textual explanations for runtime error codes */
204 nigel 75
205 ph10 598 static const char *errtexts[] = {
206     NULL, /* 0 is no error */
207     NULL, /* NOMATCH is handled specially */
208     "NULL argument passed",
209     "bad option value",
210     "magic number missing",
211     "unknown opcode - pattern overwritten?",
212     "no more memory",
213 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
214 ph10 598 "match limit exceeded",
215     "callout error code",
216     NULL, /* BADUTF8 is handled specially */
217     "bad UTF-8 offset",
218     NULL, /* PARTIAL is handled specially */
219     "not used - internal error",
220     "internal error - pattern overwritten?",
221     "bad count value",
222     "item unsupported for DFA matching",
223     "backreference condition or recursion test not supported for DFA matching",
224     "match limit not supported for DFA matching",
225     "workspace size exceeded in DFA matching",
226 ph10 654 "too much recursion for DFA matching",
227 ph10 598 "recursion limit exceeded",
228     "not used - internal error",
229     "invalid combination of newline options",
230     "bad offset value",
231 ph10 642 NULL, /* SHORTUTF8 is handled specially */
232 ph10 676 "nested recursion at the same subject position",
233 ph10 691 "JIT stack limit reached"
234 ph10 598 };
235    
236 ph10 654
237 ph10 541 /*************************************************
238     * Alternate character tables *
239     *************************************************/
240 nigel 49
241 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
242     using the default tables of the library. However, the T option can be used to
243     select alternate sets of tables, for different kinds of testing. Note also that
244 ph10 541 the L (locale) option also adjusts the tables. */
245    
246 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
247 ph10 541 only ASCII characters. */
248    
249     static const unsigned char tables0[] = {
250    
251     /* This table is a lower casing table. */
252    
253     0, 1, 2, 3, 4, 5, 6, 7,
254     8, 9, 10, 11, 12, 13, 14, 15,
255     16, 17, 18, 19, 20, 21, 22, 23,
256     24, 25, 26, 27, 28, 29, 30, 31,
257     32, 33, 34, 35, 36, 37, 38, 39,
258     40, 41, 42, 43, 44, 45, 46, 47,
259     48, 49, 50, 51, 52, 53, 54, 55,
260     56, 57, 58, 59, 60, 61, 62, 63,
261     64, 97, 98, 99,100,101,102,103,
262     104,105,106,107,108,109,110,111,
263     112,113,114,115,116,117,118,119,
264     120,121,122, 91, 92, 93, 94, 95,
265     96, 97, 98, 99,100,101,102,103,
266     104,105,106,107,108,109,110,111,
267     112,113,114,115,116,117,118,119,
268     120,121,122,123,124,125,126,127,
269     128,129,130,131,132,133,134,135,
270     136,137,138,139,140,141,142,143,
271     144,145,146,147,148,149,150,151,
272     152,153,154,155,156,157,158,159,
273     160,161,162,163,164,165,166,167,
274     168,169,170,171,172,173,174,175,
275     176,177,178,179,180,181,182,183,
276     184,185,186,187,188,189,190,191,
277     192,193,194,195,196,197,198,199,
278     200,201,202,203,204,205,206,207,
279     208,209,210,211,212,213,214,215,
280     216,217,218,219,220,221,222,223,
281     224,225,226,227,228,229,230,231,
282     232,233,234,235,236,237,238,239,
283     240,241,242,243,244,245,246,247,
284     248,249,250,251,252,253,254,255,
285    
286     /* This table is a case flipping table. */
287    
288     0, 1, 2, 3, 4, 5, 6, 7,
289     8, 9, 10, 11, 12, 13, 14, 15,
290     16, 17, 18, 19, 20, 21, 22, 23,
291     24, 25, 26, 27, 28, 29, 30, 31,
292     32, 33, 34, 35, 36, 37, 38, 39,
293     40, 41, 42, 43, 44, 45, 46, 47,
294     48, 49, 50, 51, 52, 53, 54, 55,
295     56, 57, 58, 59, 60, 61, 62, 63,
296     64, 97, 98, 99,100,101,102,103,
297     104,105,106,107,108,109,110,111,
298     112,113,114,115,116,117,118,119,
299     120,121,122, 91, 92, 93, 94, 95,
300     96, 65, 66, 67, 68, 69, 70, 71,
301     72, 73, 74, 75, 76, 77, 78, 79,
302     80, 81, 82, 83, 84, 85, 86, 87,
303     88, 89, 90,123,124,125,126,127,
304     128,129,130,131,132,133,134,135,
305     136,137,138,139,140,141,142,143,
306     144,145,146,147,148,149,150,151,
307     152,153,154,155,156,157,158,159,
308     160,161,162,163,164,165,166,167,
309     168,169,170,171,172,173,174,175,
310     176,177,178,179,180,181,182,183,
311     184,185,186,187,188,189,190,191,
312     192,193,194,195,196,197,198,199,
313     200,201,202,203,204,205,206,207,
314     208,209,210,211,212,213,214,215,
315     216,217,218,219,220,221,222,223,
316     224,225,226,227,228,229,230,231,
317     232,233,234,235,236,237,238,239,
318     240,241,242,243,244,245,246,247,
319     248,249,250,251,252,253,254,255,
320    
321     /* This table contains bit maps for various character classes. Each map is 32
322     bytes long and the bits run from the least significant end of each byte. The
323     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
324     graph, print, punct, and cntrl. Other classes are built from combinations. */
325    
326     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
327     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330    
331     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
332     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
335    
336     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
337     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
340    
341     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
342     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
345    
346     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
347     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
350    
351     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
352     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
353     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
355    
356     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
357     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
358     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
360    
361     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
362     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
363     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
365    
366     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
367     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
370    
371     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
372     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
373     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
375    
376     /* This table identifies various classes of character by individual bits:
377     0x01 white space character
378     0x02 letter
379     0x04 decimal digit
380     0x08 hexadecimal digit
381     0x10 alphanumeric or '_'
382     0x80 regular expression metacharacter or binary zero
383     */
384    
385     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
386     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
387     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
388     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
389     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
390     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
391     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
392     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
393     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
395     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
396     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
397     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
398     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
399     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
400     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
412     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
413     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
414     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
415     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
416     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
417    
418 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
419     be at least an approximation of ISO 8859. In particular, there are characters
420 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
421    
422     static const unsigned char tables1[] = {
423     0,1,2,3,4,5,6,7,
424     8,9,10,11,12,13,14,15,
425     16,17,18,19,20,21,22,23,
426     24,25,26,27,28,29,30,31,
427     32,33,34,35,36,37,38,39,
428     40,41,42,43,44,45,46,47,
429     48,49,50,51,52,53,54,55,
430     56,57,58,59,60,61,62,63,
431     64,97,98,99,100,101,102,103,
432     104,105,106,107,108,109,110,111,
433     112,113,114,115,116,117,118,119,
434     120,121,122,91,92,93,94,95,
435     96,97,98,99,100,101,102,103,
436     104,105,106,107,108,109,110,111,
437     112,113,114,115,116,117,118,119,
438     120,121,122,123,124,125,126,127,
439     128,129,130,131,132,133,134,135,
440     136,137,138,139,140,141,142,143,
441     144,145,146,147,148,149,150,151,
442     152,153,154,155,156,157,158,159,
443     160,161,162,163,164,165,166,167,
444     168,169,170,171,172,173,174,175,
445     176,177,178,179,180,181,182,183,
446     184,185,186,187,188,189,190,191,
447     224,225,226,227,228,229,230,231,
448     232,233,234,235,236,237,238,239,
449     240,241,242,243,244,245,246,215,
450     248,249,250,251,252,253,254,223,
451     224,225,226,227,228,229,230,231,
452     232,233,234,235,236,237,238,239,
453     240,241,242,243,244,245,246,247,
454     248,249,250,251,252,253,254,255,
455     0,1,2,3,4,5,6,7,
456     8,9,10,11,12,13,14,15,
457     16,17,18,19,20,21,22,23,
458     24,25,26,27,28,29,30,31,
459     32,33,34,35,36,37,38,39,
460     40,41,42,43,44,45,46,47,
461     48,49,50,51,52,53,54,55,
462     56,57,58,59,60,61,62,63,
463     64,97,98,99,100,101,102,103,
464     104,105,106,107,108,109,110,111,
465     112,113,114,115,116,117,118,119,
466     120,121,122,91,92,93,94,95,
467     96,65,66,67,68,69,70,71,
468     72,73,74,75,76,77,78,79,
469     80,81,82,83,84,85,86,87,
470     88,89,90,123,124,125,126,127,
471     128,129,130,131,132,133,134,135,
472     136,137,138,139,140,141,142,143,
473     144,145,146,147,148,149,150,151,
474     152,153,154,155,156,157,158,159,
475     160,161,162,163,164,165,166,167,
476     168,169,170,171,172,173,174,175,
477     176,177,178,179,180,181,182,183,
478     184,185,186,187,188,189,190,191,
479     224,225,226,227,228,229,230,231,
480     232,233,234,235,236,237,238,239,
481     240,241,242,243,244,245,246,215,
482     248,249,250,251,252,253,254,223,
483     192,193,194,195,196,197,198,199,
484     200,201,202,203,204,205,206,207,
485     208,209,210,211,212,213,214,247,
486     216,217,218,219,220,221,222,255,
487     0,62,0,0,1,0,0,0,
488     0,0,0,0,0,0,0,0,
489     32,0,0,0,1,0,0,0,
490     0,0,0,0,0,0,0,0,
491     0,0,0,0,0,0,255,3,
492     126,0,0,0,126,0,0,0,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,0,0,
495     0,0,0,0,0,0,255,3,
496     0,0,0,0,0,0,0,0,
497     0,0,0,0,0,0,12,2,
498     0,0,0,0,0,0,0,0,
499     0,0,0,0,0,0,0,0,
500     254,255,255,7,0,0,0,0,
501     0,0,0,0,0,0,0,0,
502     255,255,127,127,0,0,0,0,
503     0,0,0,0,0,0,0,0,
504     0,0,0,0,254,255,255,7,
505     0,0,0,0,0,4,32,4,
506     0,0,0,128,255,255,127,255,
507     0,0,0,0,0,0,255,3,
508     254,255,255,135,254,255,255,7,
509     0,0,0,0,0,4,44,6,
510     255,255,127,255,255,255,127,255,
511     0,0,0,0,254,255,255,255,
512     255,255,255,255,255,255,255,127,
513     0,0,0,0,254,255,255,255,
514     255,255,255,255,255,255,255,255,
515     0,2,0,0,255,255,255,255,
516     255,255,255,255,255,255,255,127,
517     0,0,0,0,255,255,255,255,
518     255,255,255,255,255,255,255,255,
519     0,0,0,0,254,255,0,252,
520     1,0,0,248,1,0,0,120,
521     0,0,0,0,254,255,255,255,
522     0,0,128,0,0,0,128,0,
523     255,255,255,255,0,0,0,0,
524     0,0,0,0,0,0,0,128,
525     255,255,255,255,0,0,0,0,
526     0,0,0,0,0,0,0,0,
527     128,0,0,0,0,0,0,0,
528     0,1,1,0,1,1,0,0,
529     0,0,0,0,0,0,0,0,
530     0,0,0,0,0,0,0,0,
531     1,0,0,0,128,0,0,0,
532     128,128,128,128,0,0,128,0,
533     28,28,28,28,28,28,28,28,
534     28,28,0,0,0,0,0,128,
535     0,26,26,26,26,26,26,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,18,18,18,18,18,
538     18,18,18,128,128,0,128,16,
539     0,26,26,26,26,26,26,18,
540     18,18,18,18,18,18,18,18,
541     18,18,18,18,18,18,18,18,
542     18,18,18,128,128,0,0,0,
543     0,0,0,0,0,1,0,0,
544     0,0,0,0,0,0,0,0,
545     0,0,0,0,0,0,0,0,
546     0,0,0,0,0,0,0,0,
547     1,0,0,0,0,0,0,0,
548     0,0,18,0,0,0,0,0,
549     0,0,20,20,0,18,0,0,
550     0,20,18,0,0,0,0,0,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,18,
553     18,18,18,18,18,18,18,0,
554     18,18,18,18,18,18,18,18,
555     18,18,18,18,18,18,18,18,
556     18,18,18,18,18,18,18,18,
557     18,18,18,18,18,18,18,0,
558     18,18,18,18,18,18,18,18
559     };
560    
561    
562    
563 ph10 558
564     #ifndef HAVE_STRERROR
565 nigel 49 /*************************************************
566 ph10 558 * Provide strerror() for non-ANSI libraries *
567     *************************************************/
568    
569     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
570     in their libraries, but can provide the same facility by this simple
571     alternative function. */
572    
573     extern int sys_nerr;
574     extern char *sys_errlist[];
575    
576     char *
577     strerror(int n)
578     {
579     if (n < 0 || n >= sys_nerr) return "unknown error number";
580     return sys_errlist[n];
581     }
582     #endif /* HAVE_STRERROR */
583    
584    
585 ph10 667 /*************************************************
586     * JIT memory callback *
587     *************************************************/
588 ph10 558
589 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
590     {
591     return (pcre_jit_stack *)arg;
592     }
593 ph10 558
594 ph10 667
595 ph10 558 /*************************************************
596 nigel 91 * Read or extend an input line *
597     *************************************************/
598    
599     /* Input lines are read into buffer, but both patterns and data lines can be
600     continued over multiple input lines. In addition, if the buffer fills up, we
601     want to automatically expand it so as to be able to handle extremely large
602     lines that are needed for certain stress tests. When the input buffer is
603     expanded, the other two buffers must also be expanded likewise, and the
604     contents of pbuffer, which are a copy of the input for callouts, must be
605     preserved (for when expansion happens for a data line). This is not the most
606     optimal way of handling this, but hey, this is just a test program!
607    
608     Arguments:
609     f the file to read
610     start where in buffer to start (this *must* be within buffer)
611 ph10 287 prompt for stdin or readline()
612 nigel 91
613     Returns: pointer to the start of new data
614     could be a copy of start, or could be moved
615     NULL if no data read and EOF reached
616     */
617    
618 ph10 756 static pcre_uint8 *
619     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
620 nigel 91 {
621 ph10 756 pcre_uint8 *here = start;
622 nigel 91
623     for (;;)
624     {
625 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
626 nigel 93
627 nigel 91 if (rlen > 1000)
628     {
629     int dlen;
630 ph10 289
631 ph10 287 /* If libreadline support is required, use readline() to read a line if the
632     input is a terminal. Note that readline() removes the trailing newline, so
633     we must put it back again, to be compatible with fgets(). */
634 ph10 289
635 ph10 287 #ifdef SUPPORT_LIBREADLINE
636     if (isatty(fileno(f)))
637     {
638 ph10 289 size_t len;
639 ph10 287 char *s = readline(prompt);
640     if (s == NULL) return (here == start)? NULL : start;
641     len = strlen(s);
642 ph10 289 if (len > 0) add_history(s);
643 ph10 287 if (len > rlen - 1) len = rlen - 1;
644     memcpy(here, s, len);
645     here[len] = '\n';
646 ph10 289 here[len+1] = 0;
647     free(s);
648 ph10 287 }
649 ph10 289 else
650     #endif
651    
652 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
653 ph10 289
654 ph10 287 {
655 ph10 516 if (f == stdin) printf("%s", prompt);
656 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
657     return (here == start)? NULL : start;
658 ph10 289 }
659    
660 nigel 91 dlen = (int)strlen((char *)here);
661     if (dlen > 0 && here[dlen - 1] == '\n') return start;
662     here += dlen;
663     }
664    
665     else
666     {
667     int new_buffer_size = 2*buffer_size;
668 ph10 756 pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
669     pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
670     pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
671 nigel 91
672     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
673     {
674     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
675     exit(1);
676     }
677    
678     memcpy(new_buffer, buffer, buffer_size);
679     memcpy(new_pbuffer, pbuffer, buffer_size);
680    
681     buffer_size = new_buffer_size;
682    
683     start = new_buffer + (start - buffer);
684     here = new_buffer + (here - buffer);
685    
686     free(buffer);
687     free(dbuffer);
688     free(pbuffer);
689    
690     buffer = new_buffer;
691     dbuffer = new_dbuffer;
692     pbuffer = new_pbuffer;
693     }
694     }
695    
696     return NULL; /* Control never gets here */
697     }
698    
699    
700    
701    
702    
703    
704    
705     /*************************************************
706 nigel 63 * Read number from string *
707     *************************************************/
708    
709     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
710     around with conditional compilation, just do the job by hand. It is only used
711 nigel 93 for unpicking arguments, so just keep it simple.
712 nigel 63
713     Arguments:
714     str string to be converted
715     endptr where to put the end pointer
716    
717     Returns: the unsigned long
718     */
719    
720     static int
721     get_value(unsigned char *str, unsigned char **endptr)
722     {
723     int result = 0;
724     while(*str != 0 && isspace(*str)) str++;
725     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
726     *endptr = str;
727     return(result);
728     }
729    
730    
731    
732 nigel 49
733     /*************************************************
734     * Convert UTF-8 string to value *
735     *************************************************/
736    
737     /* This function takes one or more bytes that represents a UTF-8 character,
738     and returns the value of the character.
739    
740     Argument:
741 nigel 91 utf8bytes a pointer to the byte vector
742     vptr a pointer to an int to receive the value
743 nigel 49
744 nigel 91 Returns: > 0 => the number of bytes consumed
745     -6 to 0 => malformed UTF-8 character at offset = (-return)
746 nigel 49 */
747    
748 nigel 79 #if !defined NOUTF8
749    
750 nigel 67 static int
751 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
752 nigel 49 {
753 nigel 91 int c = *utf8bytes++;
754 nigel 49 int d = c;
755     int i, j, s;
756    
757     for (i = -1; i < 6; i++) /* i is number of additional bytes */
758     {
759     if ((d & 0x80) == 0) break;
760     d <<= 1;
761     }
762    
763     if (i == -1) { *vptr = c; return 1; } /* ascii character */
764     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
765    
766     /* i now has a value in the range 1-5 */
767    
768 nigel 59 s = 6*i;
769 nigel 85 d = (c & utf8_table3[i]) << s;
770 nigel 49
771     for (j = 0; j < i; j++)
772     {
773 nigel 91 c = *utf8bytes++;
774 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
775 nigel 59 s -= 6;
776 nigel 49 d |= (c & 0x3f) << s;
777     }
778    
779     /* Check that encoding was the correct unique one */
780    
781 nigel 85 for (j = 0; j < utf8_table1_size; j++)
782     if (d <= utf8_table1[j]) break;
783 nigel 49 if (j != i) return -(i+1);
784    
785     /* Valid value */
786    
787     *vptr = d;
788     return i+1;
789     }
790    
791 nigel 79 #endif
792 nigel 49
793    
794 nigel 79
795 nigel 63 /*************************************************
796 nigel 85 * Convert character value to UTF-8 *
797     *************************************************/
798    
799     /* This function takes an integer value in the range 0 - 0x7fffffff
800     and encodes it as a UTF-8 character in 0 to 6 bytes.
801    
802     Arguments:
803     cvalue the character value
804 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
805 nigel 85
806     Returns: number of characters placed in the buffer
807     */
808    
809 nigel 93 #if !defined NOUTF8
810    
811 nigel 85 static int
812 ph10 756 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
813 nigel 85 {
814     register int i, j;
815     for (i = 0; i < utf8_table1_size; i++)
816     if (cvalue <= utf8_table1[i]) break;
817 nigel 91 utf8bytes += i;
818 nigel 85 for (j = i; j > 0; j--)
819     {
820 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
821 nigel 85 cvalue >>= 6;
822     }
823 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
824 nigel 85 return i + 1;
825     }
826    
827 nigel 93 #endif
828 nigel 85
829    
830 nigel 93
831 nigel 85 /*************************************************
832 nigel 63 * Print character string *
833     *************************************************/
834 nigel 49
835 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
836     mode. Yields number of characters printed. If handed a NULL file, just counts
837     chars without printing. */
838 nigel 49
839 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
840 nigel 3 {
841 nigel 85 int c = 0;
842 nigel 63 int yield = 0;
843 nigel 3
844 nigel 63 while (length-- > 0)
845 nigel 3 {
846 nigel 79 #if !defined NOUTF8
847 nigel 67 if (use_utf8)
848 nigel 63 {
849     int rc = utf82ord(p, &c);
850 nigel 3
851 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
852     {
853     length -= rc - 1;
854     p += rc;
855 nigel 93 if (PRINTHEX(c))
856 nigel 63 {
857     if (f != NULL) fprintf(f, "%c", c);
858     yield++;
859     }
860     else
861     {
862 nigel 93 int n = 4;
863     if (f != NULL) fprintf(f, "\\x{%02x}", c);
864     yield += (n <= 0x000000ff)? 2 :
865     (n <= 0x00000fff)? 3 :
866     (n <= 0x0000ffff)? 4 :
867     (n <= 0x000fffff)? 5 : 6;
868 nigel 63 }
869     continue;
870     }
871     }
872 nigel 79 #endif
873 nigel 3
874 nigel 63 /* Not UTF-8, or malformed UTF-8 */
875    
876 nigel 93 c = *p++;
877     if (PRINTHEX(c))
878 nigel 3 {
879 nigel 63 if (f != NULL) fprintf(f, "%c", c);
880     yield++;
881 nigel 3 }
882 nigel 63 else
883 nigel 3 {
884 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
885     yield += 4;
886     }
887     }
888 nigel 3
889 nigel 63 return yield;
890     }
891 nigel 23
892 nigel 3
893 nigel 23
894 nigel 63 /*************************************************
895     * Callout function *
896     *************************************************/
897 nigel 3
898 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
899     the match. Yield zero unless more callouts than the fail count, or the callout
900     data is not zero. */
901 nigel 3
902 nigel 63 static int callout(pcre_callout_block *cb)
903     {
904     FILE *f = (first_callout | callout_extra)? outfile : NULL;
905 nigel 75 int i, pre_start, post_start, subject_length;
906 nigel 3
907 nigel 63 if (callout_extra)
908     {
909     fprintf(f, "Callout %d: last capture = %d\n",
910     cb->callout_number, cb->capture_last);
911 nigel 3
912 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
913     {
914     if (cb->offset_vector[i] < 0)
915     fprintf(f, "%2d: <unset>\n", i/2);
916     else
917     {
918     fprintf(f, "%2d: ", i/2);
919     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
920     cb->offset_vector[i+1] - cb->offset_vector[i], f);
921     fprintf(f, "\n");
922     }
923     }
924     }
925 nigel 3
926 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
927     datails. On subsequent calls in the same match, we use pchars just to find the
928     printed lengths of the substrings. */
929 nigel 3
930 nigel 63 if (f != NULL) fprintf(f, "--->");
931 nigel 3
932 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
933     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
934     cb->current_position - cb->start_match, f);
935 nigel 3
936 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
937    
938 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
939     cb->subject_length - cb->current_position, f);
940 nigel 3
941 nigel 63 if (f != NULL) fprintf(f, "\n");
942 nigel 9
943 nigel 63 /* Always print appropriate indicators, with callout number if not already
944 nigel 75 shown. For automatic callouts, show the pattern offset. */
945 nigel 3
946 nigel 75 if (cb->callout_number == 255)
947     {
948     fprintf(outfile, "%+3d ", cb->pattern_position);
949     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
950     }
951     else
952     {
953     if (callout_extra) fprintf(outfile, " ");
954     else fprintf(outfile, "%3d ", cb->callout_number);
955     }
956 nigel 3
957 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
958     fprintf(outfile, "^");
959 nigel 3
960 nigel 63 if (post_start > 0)
961     {
962     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
963     fprintf(outfile, "^");
964 nigel 3 }
965    
966 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
967     fprintf(outfile, " ");
968    
969     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
970     pbuffer + cb->pattern_position);
971    
972 nigel 63 fprintf(outfile, "\n");
973     first_callout = 0;
974 nigel 3
975 ph10 654 if (cb->mark != last_callout_mark)
976 ph10 645 {
977 ph10 654 fprintf(outfile, "Latest Mark: %s\n",
978 ph10 645 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
979 ph10 654 last_callout_mark = cb->mark;
980     }
981 ph10 645
982 nigel 71 if (cb->callout_data != NULL)
983 nigel 49 {
984 nigel 71 int callout_data = *((int *)(cb->callout_data));
985     if (callout_data != 0)
986     {
987     fprintf(outfile, "Callout data = %d\n", callout_data);
988     return callout_data;
989     }
990 nigel 63 }
991 nigel 49
992 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
993     (++callout_count >= callout_fail_count)? 1 : 0;
994 nigel 3 }
995    
996    
997 nigel 63 /*************************************************
998 nigel 73 * Local malloc functions *
999 nigel 63 *************************************************/
1000 nigel 3
1001 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1002 ph10 801 compiled re, which is the first store request that pcre_compile() makes. The
1003     show_malloc variable is set only during matching. */
1004 nigel 3
1005     static void *new_malloc(size_t size)
1006     {
1007 nigel 73 void *block = malloc(size);
1008 nigel 43 gotten_store = size;
1009 ph10 801 if (first_gotten_store == 0) first_gotten_store = size;
1010 nigel 73 if (show_malloc)
1011 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1012 nigel 73 return block;
1013 nigel 3 }
1014    
1015 nigel 73 static void new_free(void *block)
1016     {
1017     if (show_malloc)
1018     fprintf(outfile, "free %p\n", block);
1019     free(block);
1020     }
1021 nigel 3
1022 nigel 73 /* For recursion malloc/free, to test stacking calls */
1023    
1024     static void *stack_malloc(size_t size)
1025     {
1026     void *block = malloc(size);
1027     if (show_malloc)
1028 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1029 nigel 73 return block;
1030     }
1031    
1032     static void stack_free(void *block)
1033     {
1034     if (show_malloc)
1035     fprintf(outfile, "stack_free %p\n", block);
1036     free(block);
1037     }
1038    
1039    
1040 nigel 63 /*************************************************
1041     * Call pcre_fullinfo() *
1042     *************************************************/
1043 nigel 43
1044     /* Get one piece of information from the pcre_fullinfo() function */
1045    
1046     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1047     {
1048     int rc;
1049     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1050     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1051     }
1052    
1053    
1054    
1055 nigel 63 /*************************************************
1056 nigel 75 * Byte flipping function *
1057     *************************************************/
1058    
1059 nigel 91 static unsigned long int
1060     byteflip(unsigned long int value, int n)
1061 nigel 75 {
1062     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1063     return ((value & 0x000000ff) << 24) |
1064     ((value & 0x0000ff00) << 8) |
1065     ((value & 0x00ff0000) >> 8) |
1066     ((value & 0xff000000) >> 24);
1067     }
1068    
1069    
1070    
1071    
1072     /*************************************************
1073 nigel 87 * Check match or recursion limit *
1074     *************************************************/
1075    
1076     static int
1077 ph10 756 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1078 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
1079     int flag, unsigned long int *limit, int errnumber, const char *msg)
1080     {
1081     int count;
1082     int min = 0;
1083     int mid = 64;
1084     int max = -1;
1085    
1086     extra->flags |= flag;
1087    
1088     for (;;)
1089     {
1090     *limit = mid;
1091    
1092     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1093     use_offsets, use_size_offsets);
1094    
1095     if (count == errnumber)
1096     {
1097     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1098     min = mid;
1099     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1100     }
1101    
1102     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1103     count == PCRE_ERROR_PARTIAL)
1104     {
1105     if (mid == min + 1)
1106     {
1107     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1108     break;
1109     }
1110     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1111     max = mid;
1112     mid = (min + mid)/2;
1113     }
1114     else break; /* Some other error */
1115     }
1116    
1117     extra->flags &= ~flag;
1118     return count;
1119     }
1120    
1121    
1122    
1123     /*************************************************
1124 ph10 227 * Case-independent strncmp() function *
1125     *************************************************/
1126    
1127     /*
1128     Arguments:
1129     s first string
1130     t second string
1131     n number of characters to compare
1132    
1133     Returns: < 0, = 0, or > 0, according to the comparison
1134     */
1135    
1136     static int
1137 ph10 756 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1138 ph10 227 {
1139     while (n--)
1140     {
1141     int c = tolower(*s++) - tolower(*t++);
1142     if (c) return c;
1143     }
1144     return 0;
1145     }
1146    
1147    
1148    
1149     /*************************************************
1150 nigel 91 * Check newline indicator *
1151     *************************************************/
1152    
1153 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1154     a message and return 0 if there is no match.
1155 nigel 91
1156     Arguments:
1157     p points after the leading '<'
1158     f file for error message
1159    
1160     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1161     */
1162    
1163     static int
1164 ph10 756 check_newline(pcre_uint8 *p, FILE *f)
1165 nigel 91 {
1166 ph10 756 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1167     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1168     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1169     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1170     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1171     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1172     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1173 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1174     return 0;
1175     }
1176    
1177    
1178    
1179     /*************************************************
1180 nigel 93 * Usage function *
1181     *************************************************/
1182    
1183     static void
1184     usage(void)
1185     {
1186 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1187     printf("Input and output default to stdin and stdout.\n");
1188     #ifdef SUPPORT_LIBREADLINE
1189     printf("If input is a terminal, readline() is used to read from it.\n");
1190     #else
1191     printf("This version of pcretest is not linked with readline().\n");
1192     #endif
1193     printf("\nOptions:\n");
1194 nigel 93 printf(" -b show compiled code (bytecode)\n");
1195     printf(" -C show PCRE compile-time options and exit\n");
1196     printf(" -d debug: show compiled code and information (-b and -i)\n");
1197     #if !defined NODFA
1198     printf(" -dfa force DFA matching for all subjects\n");
1199     #endif
1200     printf(" -help show usage information\n");
1201     printf(" -i show information about compiled patterns\n"
1202 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1203 nigel 93 " -m output memory used information\n"
1204     " -o <n> set size of offsets vector to <n>\n");
1205     #if !defined NOPOSIX
1206     printf(" -p use POSIX interface\n");
1207     #endif
1208     printf(" -q quiet: do not output PCRE version number at start\n");
1209     printf(" -S <n> set stack size to <n> megabytes\n");
1210 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
1211     " -s+ force each pattern to be studied, using JIT if available\n"
1212 nigel 93 " -t time compilation and execution\n");
1213     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1214     printf(" -tm time execution (matching) only\n");
1215     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1216     }
1217    
1218    
1219    
1220     /*************************************************
1221 nigel 63 * Main Program *
1222     *************************************************/
1223 nigel 43
1224 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1225     consist of a regular expression, in delimiters and optionally followed by
1226     options, followed by a set of test data, terminated by an empty line. */
1227    
1228     int main(int argc, char **argv)
1229     {
1230     FILE *infile = stdin;
1231     int options = 0;
1232     int study_options = 0;
1233 ph10 386 int default_find_match_limit = FALSE;
1234 nigel 3 int op = 1;
1235     int timeit = 0;
1236 nigel 93 int timeitm = 0;
1237 nigel 3 int showinfo = 0;
1238 nigel 31 int showstore = 0;
1239 ph10 667 int force_study = -1;
1240     int force_study_options = 0;
1241 nigel 87 int quiet = 0;
1242 nigel 53 int size_offsets = 45;
1243     int size_offsets_max;
1244 nigel 77 int *offsets = NULL;
1245 nigel 53 #if !defined NOPOSIX
1246 nigel 3 int posix = 0;
1247 nigel 53 #endif
1248 nigel 3 int debug = 0;
1249 nigel 11 int done = 0;
1250 nigel 77 int all_use_dfa = 0;
1251     int yield = 0;
1252 nigel 91 int stack_size;
1253 nigel 3
1254 ph10 667 pcre_jit_stack *jit_stack = NULL;
1255    
1256    
1257 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1258     that 1024 is plenty long enough for the few names we'll be testing. */
1259 nigel 69
1260 ph10 756 pcre_uchar copynames[1024];
1261     pcre_uchar getnames[1024];
1262 nigel 91
1263 ph10 756 pcre_uchar *copynamesptr;
1264     pcre_uchar *getnamesptr;
1265 nigel 91
1266 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1267 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1268 nigel 69
1269 ph10 756 buffer = (pcre_uint8 *)malloc(buffer_size);
1270     dbuffer = (pcre_uint8 *)malloc(buffer_size);
1271     pbuffer = (pcre_uint8 *)malloc(buffer_size);
1272 nigel 69
1273 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1274 nigel 3
1275 nigel 93 outfile = stdout;
1276    
1277     /* The following _setmode() stuff is some Windows magic that tells its runtime
1278     library to translate CRLF into a single LF character. At least, that's what
1279     I've been told: never having used Windows I take this all on trust. Originally
1280     it set 0x8000, but then I was advised that _O_BINARY was better. */
1281    
1282 nigel 75 #if defined(_WIN32) || defined(WIN32)
1283 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1284     #endif
1285 nigel 75
1286 nigel 3 /* Scan options */
1287    
1288     while (argc > 1 && argv[op][0] == '-')
1289     {
1290 nigel 63 unsigned char *endptr;
1291 nigel 53
1292 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1293 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1294 ph10 691 else if (strcmp(argv[op], "-s+") == 0)
1295 ph10 667 {
1296     force_study = 1;
1297     force_study_options = PCRE_STUDY_JIT_COMPILE;
1298 ph10 691 }
1299 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1300 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1301 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1302     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1303 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1304 nigel 79 #if !defined NODFA
1305 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1306 nigel 79 #endif
1307 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1308 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1309     *endptr == 0))
1310 nigel 53 {
1311     op++;
1312     argc--;
1313     }
1314 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1315     {
1316     int both = argv[op][2] == 0;
1317     int temp;
1318     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1319     *endptr == 0))
1320     {
1321     timeitm = temp;
1322     op++;
1323     argc--;
1324     }
1325     else timeitm = LOOPREPEAT;
1326     if (both) timeit = timeitm;
1327     }
1328 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1329     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1330     *endptr == 0))
1331     {
1332 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1333 nigel 91 printf("PCRE: -S not supported on this OS\n");
1334     exit(1);
1335     #else
1336     int rc;
1337     struct rlimit rlim;
1338     getrlimit(RLIMIT_STACK, &rlim);
1339     rlim.rlim_cur = stack_size * 1024 * 1024;
1340     rc = setrlimit(RLIMIT_STACK, &rlim);
1341     if (rc != 0)
1342     {
1343     printf("PCRE: setrlimit() failed with error %d\n", rc);
1344     exit(1);
1345     }
1346     op++;
1347     argc--;
1348     #endif
1349     }
1350 nigel 53 #if !defined NOPOSIX
1351 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1352 nigel 53 #endif
1353 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1354     {
1355     int rc;
1356 ph10 392 unsigned long int lrc;
1357 nigel 63 printf("PCRE version %s\n", pcre_version());
1358     printf("Compiled with\n");
1359     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1360     printf(" %sUTF-8 support\n", rc? "" : "No ");
1361 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1362     printf(" %sUnicode properties support\n", rc? "" : "No ");
1363 ph10 667 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1364 ph10 674 if (rc)
1365 ph10 689 printf(" Just-in-time compiler support\n");
1366 ph10 674 else
1367     printf(" No just-in-time compiler support\n");
1368 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1369 ph10 391 /* Note that these values are always the ASCII values, even
1370 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1371 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1372     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1373 ph10 150 (rc == -2)? "ANYCRLF" :
1374 nigel 93 (rc == -1)? "ANY" : "???");
1375 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1376     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1377     "all Unicode newlines");
1378 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1379     printf(" Internal link size = %d\n", rc);
1380     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1381     printf(" POSIX malloc threshold = %d\n", rc);
1382 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1383     printf(" Default match limit = %ld\n", lrc);
1384     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1385     printf(" Default recursion depth limit = %ld\n", lrc);
1386 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1387     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1388 ph10 121 goto EXIT;
1389 nigel 63 }
1390 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1391     strcmp(argv[op], "--help") == 0)
1392     {
1393     usage();
1394     goto EXIT;
1395     }
1396 nigel 3 else
1397     {
1398 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1399 nigel 93 usage();
1400 nigel 77 yield = 1;
1401     goto EXIT;
1402 nigel 3 }
1403     op++;
1404     argc--;
1405     }
1406    
1407 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1408    
1409     size_offsets_max = size_offsets;
1410 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1411 nigel 53 if (offsets == NULL)
1412     {
1413     printf("** Failed to get %d bytes of memory for offsets vector\n",
1414 ph10 151 (int)(size_offsets_max * sizeof(int)));
1415 nigel 77 yield = 1;
1416     goto EXIT;
1417 nigel 53 }
1418    
1419 nigel 3 /* Sort out the input and output files */
1420    
1421     if (argc > 1)
1422     {
1423 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1424 nigel 3 if (infile == NULL)
1425     {
1426     printf("** Failed to open %s\n", argv[op]);
1427 nigel 77 yield = 1;
1428     goto EXIT;
1429 nigel 3 }
1430     }
1431    
1432     if (argc > 2)
1433     {
1434 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1435 nigel 3 if (outfile == NULL)
1436     {
1437     printf("** Failed to open %s\n", argv[op+1]);
1438 nigel 77 yield = 1;
1439     goto EXIT;
1440 nigel 3 }
1441     }
1442    
1443     /* Set alternative malloc function */
1444    
1445     pcre_malloc = new_malloc;
1446 nigel 73 pcre_free = new_free;
1447     pcre_stack_malloc = stack_malloc;
1448     pcre_stack_free = stack_free;
1449 nigel 3
1450 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1451 nigel 3
1452 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1453 nigel 3
1454     /* Main loop */
1455    
1456 nigel 11 while (!done)
1457 nigel 3 {
1458     pcre *re = NULL;
1459     pcre_extra *extra = NULL;
1460 nigel 37
1461     #if !defined NOPOSIX /* There are still compilers that require no indent */
1462 nigel 3 regex_t preg;
1463 nigel 45 int do_posix = 0;
1464 nigel 37 #endif
1465    
1466 nigel 7 const char *error;
1467 ph10 512 unsigned char *markptr;
1468 nigel 25 unsigned char *p, *pp, *ppp;
1469 nigel 75 unsigned char *to_file = NULL;
1470 nigel 53 const unsigned char *tables = NULL;
1471 nigel 75 unsigned long int true_size, true_study_size = 0;
1472     size_t size, regex_gotten_store;
1473 ph10 654 int do_allcaps = 0;
1474 ph10 512 int do_mark = 0;
1475 nigel 3 int do_study = 0;
1476 ph10 654 int no_force_study = 0;
1477 nigel 25 int do_debug = debug;
1478 nigel 35 int do_G = 0;
1479     int do_g = 0;
1480 nigel 25 int do_showinfo = showinfo;
1481 nigel 35 int do_showrest = 0;
1482 ph10 616 int do_showcaprest = 0;
1483 nigel 75 int do_flip = 0;
1484 nigel 93 int erroroffset, len, delimiter, poffset;
1485 nigel 3
1486 nigel 67 use_utf8 = 0;
1487 ph10 211 debug_lengths = 1;
1488 nigel 63
1489 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1490 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1491 nigel 63 fflush(outfile);
1492 nigel 3
1493     p = buffer;
1494     while (isspace(*p)) p++;
1495     if (*p == 0) continue;
1496    
1497 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1498 nigel 3
1499 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1500     {
1501 nigel 91 unsigned long int magic, get_options;
1502 ph10 756 pcre_uint8 sbuf[8];
1503 nigel 75 FILE *f;
1504    
1505     p++;
1506     pp = p + (int)strlen((char *)p);
1507     while (isspace(pp[-1])) pp--;
1508     *pp = 0;
1509    
1510     f = fopen((char *)p, "rb");
1511     if (f == NULL)
1512     {
1513     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1514     continue;
1515     }
1516    
1517     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1518    
1519     true_size =
1520     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1521     true_study_size =
1522     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1523    
1524     re = (real_pcre *)new_malloc(true_size);
1525 ph10 801 regex_gotten_store = first_gotten_store;
1526 nigel 75
1527     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1528    
1529     magic = ((real_pcre *)re)->magic_number;
1530     if (magic != MAGIC_NUMBER)
1531     {
1532     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1533     {
1534     do_flip = 1;
1535     }
1536     else
1537     {
1538     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1539     fclose(f);
1540     continue;
1541     }
1542     }
1543    
1544 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1545 nigel 75 do_flip? " (byte-inverted)" : "", p);
1546    
1547     /* Need to know if UTF-8 for printing data strings */
1548    
1549 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1550     use_utf8 = (get_options & PCRE_UTF8) != 0;
1551 nigel 75
1552 ph10 612 /* Now see if there is any following study data. */
1553 nigel 75
1554     if (true_study_size != 0)
1555     {
1556     pcre_study_data *psd;
1557    
1558     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1559     extra->flags = PCRE_EXTRA_STUDY_DATA;
1560    
1561     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1562     extra->study_data = psd;
1563    
1564     if (fread(psd, 1, true_study_size, f) != true_study_size)
1565     {
1566     FAIL_READ:
1567     fprintf(outfile, "Failed to read data from %s\n", p);
1568 ph10 667 if (extra != NULL) pcre_free_study(extra);
1569 nigel 75 if (re != NULL) new_free(re);
1570     fclose(f);
1571     continue;
1572     }
1573     fprintf(outfile, "Study data loaded from %s\n", p);
1574     do_study = 1; /* To get the data output if requested */
1575     }
1576     else fprintf(outfile, "No study data\n");
1577    
1578     fclose(f);
1579     goto SHOW_INFO;
1580     }
1581    
1582     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1583     the pattern; if is isn't complete, read more. */
1584    
1585 nigel 3 delimiter = *p++;
1586    
1587 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1588 nigel 3 {
1589 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1590 nigel 3 goto SKIP_DATA;
1591     }
1592    
1593     pp = p;
1594 ph10 530 poffset = (int)(p - buffer);
1595 nigel 3
1596     for(;;)
1597     {
1598 nigel 29 while (*pp != 0)
1599     {
1600     if (*pp == '\\' && pp[1] != 0) pp++;
1601     else if (*pp == delimiter) break;
1602     pp++;
1603     }
1604 nigel 3 if (*pp != 0) break;
1605 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1606 nigel 3 {
1607     fprintf(outfile, "** Unexpected EOF\n");
1608 nigel 11 done = 1;
1609     goto CONTINUE;
1610 nigel 3 }
1611 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1612 nigel 3 }
1613    
1614 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1615     pointer to the correct relative point in the buffer. */
1616    
1617     p = buffer + poffset;
1618    
1619 nigel 29 /* If the first character after the delimiter is backslash, make
1620     the pattern end with backslash. This is purely to provide a way
1621     of testing for the error message when a pattern ends with backslash. */
1622    
1623     if (pp[1] == '\\') *pp++ = '\\';
1624    
1625 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1626     for callouts. */
1627 nigel 3
1628     *pp++ = 0;
1629 nigel 75 strcpy((char *)pbuffer, (char *)p);
1630 nigel 3
1631     /* Look for options after final delimiter */
1632    
1633     options = 0;
1634 ph10 801 study_options = 0;
1635 nigel 31 log_store = showstore; /* default from command line */
1636    
1637 nigel 3 while (*pp != 0)
1638     {
1639     switch (*pp++)
1640     {
1641 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1642 nigel 35 case 'g': do_g = 1; break;
1643 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1644     case 'm': options |= PCRE_MULTILINE; break;
1645     case 's': options |= PCRE_DOTALL; break;
1646     case 'x': options |= PCRE_EXTENDED; break;
1647 nigel 25
1648 ph10 616 case '+':
1649 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1650 ph10 616 break;
1651 ph10 654
1652     case '=': do_allcaps = 1; break;
1653 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1654 nigel 93 case 'B': do_debug = 1; break;
1655 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1656 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1657 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1658 nigel 75 case 'F': do_flip = 1; break;
1659 nigel 35 case 'G': do_G = 1; break;
1660 nigel 25 case 'I': do_showinfo = 1; break;
1661 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1662 ph10 512 case 'K': do_mark = 1; break;
1663 nigel 31 case 'M': log_store = 1; break;
1664 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1665 nigel 37
1666     #if !defined NOPOSIX
1667 nigel 3 case 'P': do_posix = 1; break;
1668 nigel 37 #endif
1669    
1670 ph10 654 case 'S':
1671 ph10 691 if (do_study == 0)
1672 ph10 612 {
1673 ph10 691 do_study = 1;
1674 ph10 667 if (*pp == '+')
1675     {
1676     study_options |= PCRE_STUDY_JIT_COMPILE;
1677 ph10 691 pp++;
1678     }
1679     }
1680 ph10 667 else
1681     {
1682 ph10 612 do_study = 0;
1683     no_force_study = 1;
1684 ph10 654 }
1685 ph10 612 break;
1686    
1687 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1688 ph10 535 case 'W': options |= PCRE_UCP; break;
1689 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1690 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1691 ph10 126 case 'Z': debug_lengths = 0; break;
1692 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1693 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1694 ph10 545
1695 ph10 541 case 'T':
1696     switch (*pp++)
1697     {
1698     case '0': tables = tables0; break;
1699     case '1': tables = tables1; break;
1700 ph10 545
1701 ph10 541 case '\r':
1702     case '\n':
1703 ph10 545 case ' ':
1704     case 0:
1705 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1706 ph10 545 goto SKIP_DATA;
1707    
1708     default:
1709 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1710 ph10 545 goto SKIP_DATA;
1711 ph10 541 }
1712 ph10 545 break;
1713 nigel 25
1714     case 'L':
1715     ppp = pp;
1716 nigel 93 /* The '\r' test here is so that it works on Windows. */
1717     /* The '0' test is just in case this is an unterminated line. */
1718     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1719 nigel 25 *ppp = 0;
1720     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1721     {
1722     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1723     goto SKIP_DATA;
1724     }
1725 nigel 93 locale_set = 1;
1726 nigel 25 tables = pcre_maketables();
1727     pp = ppp;
1728     break;
1729    
1730 nigel 75 case '>':
1731     to_file = pp;
1732     while (*pp != 0) pp++;
1733     while (isspace(pp[-1])) pp--;
1734     *pp = 0;
1735     break;
1736    
1737 nigel 91 case '<':
1738     {
1739 ph10 756 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1740 ph10 336 {
1741     options |= PCRE_JAVASCRIPT_COMPAT;
1742 ph10 345 pp += 3;
1743 ph10 336 }
1744     else
1745 ph10 345 {
1746 ph10 336 int x = check_newline(pp, outfile);
1747     if (x == 0) goto SKIP_DATA;
1748     options |= x;
1749     while (*pp++ != '>');
1750 ph10 345 }
1751 nigel 91 }
1752     break;
1753    
1754 nigel 77 case '\r': /* So that it works in Windows */
1755     case '\n':
1756     case ' ':
1757     break;
1758 nigel 75
1759 nigel 3 default:
1760     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1761     goto SKIP_DATA;
1762     }
1763     }
1764    
1765 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1766 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1767     local character tables. */
1768 nigel 3
1769 nigel 37 #if !defined NOPOSIX
1770 nigel 3 if (posix || do_posix)
1771     {
1772     int rc;
1773     int cflags = 0;
1774 nigel 75
1775 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1776     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1777 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1778 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1779     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1780 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1781 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1782 nigel 87
1783 ph10 801 first_gotten_store = 0;
1784 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1785    
1786     /* Compilation failed; go back for another re, skipping to blank line
1787     if non-interactive. */
1788    
1789     if (rc != 0)
1790     {
1791 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1792 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1793     goto SKIP_DATA;
1794     }
1795     }
1796    
1797     /* Handle compiling via the native interface */
1798    
1799     else
1800 nigel 37 #endif /* !defined NOPOSIX */
1801    
1802 nigel 3 {
1803 ph10 412 unsigned long int get_options;
1804 ph10 416
1805 nigel 93 if (timeit > 0)
1806 nigel 3 {
1807     register int i;
1808     clock_t time_taken;
1809     clock_t start_time = clock();
1810 nigel 93 for (i = 0; i < timeit; i++)
1811 nigel 3 {
1812 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1813 nigel 3 if (re != NULL) free(re);
1814     }
1815     time_taken = clock() - start_time;
1816 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1817     (((double)time_taken * 1000.0) / (double)timeit) /
1818 nigel 63 (double)CLOCKS_PER_SEC);
1819 nigel 3 }
1820    
1821 ph10 801 first_gotten_store = 0;
1822 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1823 nigel 3
1824     /* Compilation failed; go back for another re, skipping to blank line
1825     if non-interactive. */
1826    
1827     if (re == NULL)
1828     {
1829     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1830     SKIP_DATA:
1831     if (infile != stdin)
1832     {
1833     for (;;)
1834     {
1835 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1836 nigel 11 {
1837     done = 1;
1838     goto CONTINUE;
1839     }
1840 nigel 3 len = (int)strlen((char *)buffer);
1841     while (len > 0 && isspace(buffer[len-1])) len--;
1842     if (len == 0) break;
1843     }
1844     fprintf(outfile, "\n");
1845     }
1846 nigel 25 goto CONTINUE;
1847 nigel 3 }
1848 ph10 416
1849     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1850     within the regex; check for this so that we know how to process the data
1851 ph10 412 lines. */
1852 ph10 416
1853 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1854     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1855 nigel 3
1856 ph10 801 /* Extract the size for possible writing before possibly flipping it,
1857     and remember the store that was got. */
1858 nigel 3
1859 ph10 801 true_size = ((real_pcre *)re)->size;
1860     regex_gotten_store = first_gotten_store;
1861    
1862     /* Output code size information if requested */
1863    
1864 nigel 63 if (log_store)
1865     fprintf(outfile, "Memory allocation (code space): %d\n",
1866 ph10 801 (int)(first_gotten_store -
1867 nigel 63 sizeof(real_pcre) -
1868     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1869    
1870 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
1871 ph10 654 help with the matching, unless the pattern has the SS option, which
1872 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
1873     never sensible). */
1874 nigel 75
1875 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
1876 nigel 75 {
1877 nigel 93 if (timeit > 0)
1878 nigel 75 {
1879     register int i;
1880     clock_t time_taken;
1881     clock_t start_time = clock();
1882 nigel 93 for (i = 0; i < timeit; i++)
1883 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1884 nigel 75 time_taken = clock() - start_time;
1885 ph10 667 if (extra != NULL) pcre_free_study(extra);
1886 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1887     (((double)time_taken * 1000.0) / (double)timeit) /
1888 nigel 75 (double)CLOCKS_PER_SEC);
1889     }
1890 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1891 nigel 75 if (error != NULL)
1892     fprintf(outfile, "Failed to study: %s\n", error);
1893     else if (extra != NULL)
1894 ph10 801 {
1895 nigel 75 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1896 ph10 801 if (log_store)
1897     {
1898     size_t jitsize;
1899     new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
1900     if (jitsize != 0)
1901     fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
1902     }
1903     }
1904 nigel 75 }
1905 ph10 512
1906 ph10 510 /* If /K was present, we set up for handling MARK data. */
1907 ph10 512
1908 ph10 510 if (do_mark)
1909     {
1910     if (extra == NULL)
1911     {
1912     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1913     extra->flags = 0;
1914     }
1915 ph10 512 extra->mark = &markptr;
1916 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1917 ph10 512 }
1918 nigel 75
1919     /* If the 'F' option was present, we flip the bytes of all the integer
1920     fields in the regex data block and the study block. This is to make it
1921     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1922     compiled on a different architecture. */
1923    
1924     if (do_flip)
1925     {
1926     real_pcre *rre = (real_pcre *)re;
1927 ph10 259 rre->magic_number =
1928 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1929 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1930     rre->options = byteflip(rre->options, sizeof(rre->options));
1931 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1932 ph10 259 rre->top_bracket =
1933 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1934 ph10 259 rre->top_backref =
1935 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1936 zherczeg 774 rre->first_char =
1937     (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
1938     rre->req_char =
1939     (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
1940 ph10 255 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1941 nigel 75 sizeof(rre->name_table_offset));
1942 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1943 nigel 75 sizeof(rre->name_entry_size));
1944 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1945 ph10 255 sizeof(rre->name_count));
1946 nigel 75
1947     if (extra != NULL)
1948     {
1949     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1950     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1951 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1952     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1953 nigel 75 }
1954     }
1955    
1956 ph10 801 /* Extract information from the compiled data if required. There are now
1957     two info-returning functions. The old one has a limited interface and
1958     returns only limited data. Check that it agrees with the newer one. */
1959 nigel 75
1960     SHOW_INFO:
1961    
1962 nigel 93 if (do_debug)
1963     {
1964     fprintf(outfile, "------------------------------------------------------------------\n");
1965 ph10 116 pcre_printint(re, outfile, debug_lengths);
1966 nigel 93 }
1967 ph10 416
1968 ph10 412 /* We already have the options in get_options (see above) */
1969 nigel 93
1970 nigel 25 if (do_showinfo)
1971 nigel 3 {
1972 ph10 412 unsigned long int all_options;
1973 nigel 79 #if !defined NOINFOCHECK
1974 nigel 43 int old_first_char, old_options, old_count;
1975 nigel 79 #endif
1976 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1977 ph10 227 hascrorlf;
1978 nigel 63 int nameentrysize, namecount;
1979 ph10 756 const pcre_uchar *nametable;
1980 nigel 3
1981 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1982     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1983     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1984 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1985 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1986 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1987     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1988 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1989 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1990     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1991 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1992 nigel 43
1993 nigel 79 #if !defined NOINFOCHECK
1994 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1995 nigel 3 if (count < 0) fprintf(outfile,
1996 nigel 43 "Error %d from pcre_info()\n", count);
1997 nigel 3 else
1998     {
1999 nigel 43 if (old_count != count) fprintf(outfile,
2000     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2001     old_count);
2002 nigel 37
2003 nigel 43 if (old_first_char != first_char) fprintf(outfile,
2004     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2005     first_char, old_first_char);
2006 nigel 37
2007 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
2008     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2009     get_options, old_options);
2010 nigel 43 }
2011 nigel 79 #endif
2012 nigel 43
2013 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
2014 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2015 nigel 77 (int)size, (int)regex_gotten_store);
2016 nigel 43
2017     fprintf(outfile, "Capturing subpattern count = %d\n", count);
2018     if (backrefmax > 0)
2019     fprintf(outfile, "Max back reference = %d\n", backrefmax);
2020 nigel 63
2021     if (namecount > 0)
2022     {
2023     fprintf(outfile, "Named capturing subpatterns:\n");
2024     while (namecount-- > 0)
2025     {
2026     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2027     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2028     GET2(nametable, 0));
2029     nametable += nameentrysize;
2030     }
2031     }
2032 ph10 172
2033 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2034 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2035 nigel 63
2036 nigel 75 all_options = ((real_pcre *)re)->options;
2037 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2038 nigel 75
2039 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
2040 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2041 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2042     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2043     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2044     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2045 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2046 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2047 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2048     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2049 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2050     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2051     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2052 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2053 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2054 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2055 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2056 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2057 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2058 ph10 172
2059 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2060 nigel 43
2061 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2062 nigel 91 {
2063     case PCRE_NEWLINE_CR:
2064     fprintf(outfile, "Forced newline sequence: CR\n");
2065     break;
2066 nigel 43
2067 nigel 91 case PCRE_NEWLINE_LF:
2068     fprintf(outfile, "Forced newline sequence: LF\n");
2069     break;
2070    
2071     case PCRE_NEWLINE_CRLF:
2072     fprintf(outfile, "Forced newline sequence: CRLF\n");
2073     break;
2074    
2075 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2076     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2077     break;
2078    
2079 nigel 93 case PCRE_NEWLINE_ANY:
2080     fprintf(outfile, "Forced newline sequence: ANY\n");
2081     break;
2082    
2083 nigel 91 default:
2084     break;
2085     }
2086    
2087 nigel 43 if (first_char == -1)
2088     {
2089 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2090 nigel 43 }
2091     else if (first_char < 0)
2092     {
2093     fprintf(outfile, "No first char\n");
2094     }
2095     else
2096     {
2097 zherczeg 774 const char *caseless =
2098     ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2099 nigel 63 "" : " (caseless)";
2100 zherczeg 774
2101     if (PRINTHEX(first_char))
2102     fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2103 nigel 3 else
2104 zherczeg 774 fprintf(outfile, "First char = %d%s\n", first_char, caseless);
2105 nigel 43 }
2106 nigel 37
2107 nigel 43 if (need_char < 0)
2108     {
2109     fprintf(outfile, "No need char\n");
2110 nigel 3 }
2111 nigel 43 else
2112     {
2113 zherczeg 774 const char *caseless =
2114     ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2115 nigel 63 "" : " (caseless)";
2116 zherczeg 774
2117     if (PRINTHEX(need_char))
2118     fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2119 nigel 43 else
2120 zherczeg 774 fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2121 nigel 43 }
2122 nigel 75
2123     /* Don't output study size; at present it is in any case a fixed
2124     value, but it varies, depending on the computer architecture, and
2125     so messes up the test suite. (And with the /F option, it might be
2126 ph10 654 flipped.) If study was forced by an external -s, don't show this
2127 ph10 612 information unless -i or -d was also present. This means that, except
2128     when auto-callouts are involved, the output from runs with and without
2129     -s should be identical. */
2130 nigel 75
2131 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2132 nigel 75 {
2133     if (extra == NULL)
2134     fprintf(outfile, "Study returned NULL\n");
2135     else
2136     {
2137 ph10 756 pcre_uint8 *start_bits = NULL;
2138 ph10 455 int minlength;
2139 ph10 461
2140 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2141 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2142    
2143 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2144     if (start_bits == NULL)
2145 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2146 nigel 75 else
2147     {
2148     int i;
2149     int c = 24;
2150     fprintf(outfile, "Starting byte set: ");
2151     for (i = 0; i < 256; i++)
2152     {
2153     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2154     {
2155     if (c > 75)
2156     {
2157     fprintf(outfile, "\n ");
2158     c = 2;
2159     }
2160 nigel 93 if (PRINTHEX(i) && i != ' ')
2161 nigel 75 {
2162     fprintf(outfile, "%c ", i);
2163     c += 2;
2164     }
2165     else
2166     {
2167     fprintf(outfile, "\\x%02x ", i);
2168     c += 5;
2169     }
2170     }
2171     }
2172     fprintf(outfile, "\n");
2173     }
2174     }
2175 ph10 691
2176 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
2177 ph10 691
2178 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2179     {
2180 ph10 691 int jit;
2181 ph10 667 new_info(re, extra, PCRE_INFO_JIT, &jit);
2182 ph10 691 if (jit)
2183     fprintf(outfile, "JIT study was successful\n");
2184     else
2185     #ifdef SUPPORT_JIT
2186     fprintf(outfile, "JIT study was not successful\n");
2187 ph10 667 #else
2188 ph10 691 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2189 ph10 667 #endif
2190 ph10 691 }
2191 nigel 75 }
2192 nigel 3 }
2193    
2194 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2195     that is all. The first 8 bytes of the file are the regex length and then
2196     the study length, in big-endian order. */
2197 nigel 3
2198 nigel 75 if (to_file != NULL)
2199 nigel 3 {
2200 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2201     if (f == NULL)
2202 nigel 3 {
2203 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2204 nigel 3 }
2205 nigel 75 else
2206     {
2207 ph10 756 pcre_uint8 sbuf[8];
2208     sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2209     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2210     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
2211     sbuf[3] = (pcre_uint8)((true_size) & 255);
2212 ph10 259
2213 ph10 756 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2214     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2215     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
2216     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2217 nigel 3
2218 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2219     fwrite(re, 1, true_size, f) < true_size)
2220     {
2221     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2222     }
2223 nigel 3 else
2224     {
2225 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2226 ph10 654
2227 ph10 658 /* If there is study data, write it. */
2228 ph10 654
2229 nigel 75 if (extra != NULL)
2230 nigel 3 {
2231 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2232     true_study_size)
2233 nigel 3 {
2234 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2235     strerror(errno));
2236 nigel 3 }
2237 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2238 nigel 3 }
2239     }
2240 nigel 75 fclose(f);
2241 nigel 3 }
2242 nigel 77
2243     new_free(re);
2244 ph10 667 if (extra != NULL) pcre_free_study(extra);
2245 ph10 545 if (locale_set)
2246 ph10 541 {
2247     new_free((void *)tables);
2248     setlocale(LC_CTYPE, "C");
2249 ph10 545 locale_set = 0;
2250     }
2251 nigel 75 continue; /* With next regex */
2252 nigel 3 }
2253 nigel 75 } /* End of non-POSIX compile */
2254 nigel 3
2255     /* Read data lines and test them */
2256    
2257     for (;;)
2258     {
2259 ph10 756 pcre_uint8 *q;
2260     pcre_uint8 *bptr;
2261 nigel 57 int *use_offsets = offsets;
2262 nigel 53 int use_size_offsets = size_offsets;
2263 nigel 63 int callout_data = 0;
2264     int callout_data_set = 0;
2265 nigel 3 int count, c;
2266 nigel 29 int copystrings = 0;
2267 ph10 386 int find_match_limit = default_find_match_limit;
2268 nigel 29 int getstrings = 0;
2269     int getlist = 0;
2270 nigel 39 int gmatched = 0;
2271 nigel 35 int start_offset = 0;
2272 ph10 579 int start_offset_sign = 1;
2273 nigel 41 int g_notempty = 0;
2274 nigel 77 int use_dfa = 0;
2275 nigel 3
2276     options = 0;
2277    
2278 nigel 91 *copynames = 0;
2279     *getnames = 0;
2280    
2281     copynamesptr = copynames;
2282     getnamesptr = getnames;
2283    
2284 nigel 63 pcre_callout = callout;
2285     first_callout = 1;
2286 ph10 654 last_callout_mark = NULL;
2287 nigel 63 callout_extra = 0;
2288     callout_count = 0;
2289     callout_fail_count = 999999;
2290     callout_fail_id = -1;
2291 nigel 73 show_malloc = 0;
2292 nigel 63
2293 nigel 91 if (extra != NULL) extra->flags &=
2294     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2295    
2296     len = 0;
2297     for (;;)
2298 nigel 11 {
2299 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2300 nigel 91 {
2301 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2302     {
2303 ph10 545 fprintf(outfile, "\n");
2304 ph10 537 break;
2305 ph10 545 }
2306 nigel 91 done = 1;
2307     goto CONTINUE;
2308     }
2309     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2310     len = (int)strlen((char *)buffer);
2311     if (buffer[len-1] == '\n') break;
2312 nigel 11 }
2313 nigel 3
2314     while (len > 0 && isspace(buffer[len-1])) len--;
2315     buffer[len] = 0;
2316     if (len == 0) break;
2317    
2318     p = buffer;
2319     while (isspace(*p)) p++;
2320    
2321 ph10 147 bptr = q = dbuffer;
2322 nigel 3 while ((c = *p++) != 0)
2323     {
2324     int i = 0;
2325     int n = 0;
2326 nigel 63
2327 nigel 3 if (c == '\\') switch ((c = *p++))
2328     {
2329     case 'a': c = 7; break;
2330     case 'b': c = '\b'; break;
2331     case 'e': c = 27; break;
2332     case 'f': c = '\f'; break;
2333     case 'n': c = '\n'; break;
2334     case 'r': c = '\r'; break;
2335     case 't': c = '\t'; break;
2336     case 'v': c = '\v'; break;
2337    
2338     case '0': case '1': case '2': case '3':
2339     case '4': case '5': case '6': case '7':
2340     c -= '0';
2341     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2342     c = c * 8 + *p++ - '0';
2343 nigel 91
2344     #if !defined NOUTF8
2345     if (use_utf8 && c > 255)
2346     {
2347     unsigned char buff8[8];
2348     int ii, utn;
2349     utn = ord2utf8(c, buff8);
2350     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2351     c = buff8[ii]; /* Last byte */
2352     }
2353     #endif
2354 nigel 3 break;
2355    
2356     case 'x':
2357 nigel 49
2358     /* Handle \x{..} specially - new Perl thing for utf8 */
2359    
2360 nigel 79 #if !defined NOUTF8
2361 nigel 49 if (*p == '{')
2362     {
2363     unsigned char *pt = p;
2364     c = 0;
2365 ph10 738
2366 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2367     when isxdigit() is a macro that refers to its argument more than
2368     once. This is banned by the C Standard, but apparently happens in at
2369     least one MacOS environment. */
2370 ph10 738
2371 ph10 735 for (pt++; isxdigit(*pt); pt++)
2372 ph10 734 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2373 nigel 49 if (*pt == '}')
2374     {
2375 nigel 67 unsigned char buff8[8];
2376 nigel 49 int ii, utn;
2377 ph10 355 if (use_utf8)
2378 ph10 358 {
2379 ph10 355 utn = ord2utf8(c, buff8);
2380     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2381     c = buff8[ii]; /* Last byte */
2382     }
2383     else
2384     {
2385 ph10 358 if (c > 255)
2386 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2387     "UTF-8 mode is not enabled.\n"
2388     "** Truncation will probably give the wrong result.\n", c);
2389 ph10 358 }
2390 nigel 49 p = pt + 1;
2391     break;
2392     }
2393     /* Not correct form; fall through */
2394     }
2395 nigel 79 #endif
2396 nigel 49
2397     /* Ordinary \x */
2398    
2399 nigel 3 c = 0;
2400     while (i++ < 2 && isxdigit(*p))
2401     {
2402 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2403 nigel 3 p++;
2404     }
2405     break;
2406    
2407 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2408 nigel 3 p--;
2409     continue;
2410    
2411 nigel 75 case '>':
2412 ph10 579 if (*p == '-')
2413 ph10 567 {
2414     start_offset_sign = -1;
2415     p++;
2416 ph10 579 }
2417 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2418 ph10 579 start_offset *= start_offset_sign;
2419 nigel 75 continue;
2420    
2421 nigel 3 case 'A': /* Option setting */
2422     options |= PCRE_ANCHORED;
2423     continue;
2424    
2425     case 'B':
2426     options |= PCRE_NOTBOL;
2427     continue;
2428    
2429 nigel 29 case 'C':
2430 nigel 63 if (isdigit(*p)) /* Set copy string */
2431     {
2432     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2433     copystrings |= 1 << n;
2434     }
2435     else if (isalnum(*p))
2436     {
2437 ph10 756 pcre_uchar *npp = copynamesptr;
2438 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2439 nigel 91 *npp++ = 0;
2440 nigel 67 *npp = 0;
2441 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2442 nigel 63 if (n < 0)
2443 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2444     copynamesptr = npp;
2445 nigel 63 }
2446     else if (*p == '+')
2447     {
2448     callout_extra = 1;
2449     p++;
2450     }
2451     else if (*p == '-')
2452     {
2453     pcre_callout = NULL;
2454     p++;
2455     }
2456     else if (*p == '!')
2457     {
2458     callout_fail_id = 0;
2459     p++;
2460     while(isdigit(*p))
2461     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2462     callout_fail_count = 0;
2463     if (*p == '!')
2464     {
2465     p++;
2466     while(isdigit(*p))
2467     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2468     }
2469     }
2470     else if (*p == '*')
2471     {
2472     int sign = 1;
2473     callout_data = 0;
2474     if (*(++p) == '-') { sign = -1; p++; }
2475     while(isdigit(*p))
2476     callout_data = callout_data * 10 + *p++ - '0';
2477     callout_data *= sign;
2478     callout_data_set = 1;
2479     }
2480 nigel 29 continue;
2481    
2482 nigel 79 #if !defined NODFA
2483 nigel 77 case 'D':
2484 nigel 79 #if !defined NOPOSIX
2485 nigel 77 if (posix || do_posix)
2486     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2487     else
2488 nigel 79 #endif
2489 nigel 77 use_dfa = 1;
2490     continue;
2491 ph10 553 #endif
2492 nigel 77
2493 ph10 553 #if !defined NODFA
2494 nigel 77 case 'F':
2495     options |= PCRE_DFA_SHORTEST;
2496     continue;
2497 nigel 79 #endif
2498 nigel 77
2499 nigel 29 case 'G':
2500 nigel 63 if (isdigit(*p))
2501     {
2502     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2503     getstrings |= 1 << n;
2504     }
2505     else if (isalnum(*p))
2506     {
2507 ph10 756 pcre_uchar *npp = getnamesptr;
2508 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2509 nigel 91 *npp++ = 0;
2510 nigel 67 *npp = 0;
2511 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2512 nigel 63 if (n < 0)
2513 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2514     getnamesptr = npp;
2515 nigel 63 }
2516 nigel 29 continue;
2517 ph10 691
2518 ph10 667 case 'J':
2519     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2520 ph10 691 if (extra != NULL
2521     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2522 ph10 667 && extra->executable_jit != NULL)
2523 ph10 691 {
2524 ph10 667 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2525     jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2526 ph10 675 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2527 ph10 691 }
2528 ph10 667 continue;
2529 nigel 29
2530     case 'L':
2531     getlist = 1;
2532     continue;
2533    
2534 nigel 63 case 'M':
2535     find_match_limit = 1;
2536     continue;
2537    
2538 nigel 37 case 'N':
2539 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2540     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2541 ph10 461 else
2542 ph10 442 options |= PCRE_NOTEMPTY;
2543 nigel 37 continue;
2544    
2545 nigel 3 case 'O':
2546     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2547 nigel 53 if (n > size_offsets_max)
2548     {
2549     size_offsets_max = n;
2550 nigel 57 free(offsets);
2551 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2552 nigel 53 if (offsets == NULL)
2553     {
2554     printf("** Failed to get %d bytes of memory for offsets vector\n",
2555 ph10 151 (int)(size_offsets_max * sizeof(int)));
2556 nigel 77 yield = 1;
2557     goto EXIT;
2558 nigel 53 }
2559     }
2560     use_size_offsets = n;
2561 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2562 nigel 3 continue;
2563    
2564 nigel 75 case 'P':
2565 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2566 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2567 nigel 75 continue;
2568    
2569 nigel 91 case 'Q':
2570     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2571     if (extra == NULL)
2572     {
2573     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2574     extra->flags = 0;
2575     }
2576     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2577     extra->match_limit_recursion = n;
2578     continue;
2579    
2580     case 'q':
2581     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2582     if (extra == NULL)
2583     {
2584     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2585     extra->flags = 0;
2586     }
2587     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2588     extra->match_limit = n;
2589     continue;
2590    
2591 nigel 79 #if !defined NODFA
2592 nigel 77 case 'R':
2593     options |= PCRE_DFA_RESTART;
2594     continue;
2595 nigel 79 #endif
2596 nigel 77
2597 nigel 73 case 'S':
2598     show_malloc = 1;
2599     continue;
2600 ph10 392
2601 ph10 389 case 'Y':
2602     options |= PCRE_NO_START_OPTIMIZE;
2603 ph10 392 continue;
2604 nigel 73
2605 nigel 3 case 'Z':
2606     options |= PCRE_NOTEOL;
2607     continue;
2608 nigel 71
2609     case '?':
2610     options |= PCRE_NO_UTF8_CHECK;
2611     continue;
2612 nigel 91
2613     case '<':
2614     {
2615     int x = check_newline(p, outfile);
2616     if (x == 0) goto NEXT_DATA;
2617     options |= x;
2618     while (*p++ != '>');
2619     }
2620     continue;
2621 nigel 3 }
2622 nigel 9 *q++ = c;
2623 nigel 3 }
2624 nigel 9 *q = 0;
2625 ph10 530 len = (int)(q - dbuffer);
2626 ph10 545
2627 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2628 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2629 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2630 ph10 371
2631 ph10 363 #if !defined NOPOSIX
2632     if (posix || do_posix)
2633     {
2634     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2635 ph10 371 bptr += buffer_size - len - 1;
2636 ph10 363 }
2637 ph10 371 else
2638     #endif
2639 ph10 363 {
2640     memmove(bptr + buffer_size - len, bptr, len);
2641 ph10 371 bptr += buffer_size - len;
2642     }
2643 nigel 3
2644 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2645     {
2646     printf("**Match limit not relevant for DFA matching: ignored\n");
2647     find_match_limit = 0;
2648     }
2649    
2650 nigel 3 /* Handle matching via the POSIX interface, which does not
2651 nigel 63 support timing or playing with the match limit or callout data. */
2652 nigel 3
2653 nigel 37 #if !defined NOPOSIX
2654 nigel 3 if (posix || do_posix)
2655     {
2656     int rc;
2657     int eflags = 0;
2658 nigel 63 regmatch_t *pmatch = NULL;
2659     if (use_size_offsets > 0)
2660 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2661 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2662     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2663 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2664 nigel 3
2665 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2666 nigel 3
2667     if (rc != 0)
2668     {
2669 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2670 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2671     }
2672 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2673     != 0)
2674     {
2675     fprintf(outfile, "Matched with REG_NOSUB\n");
2676     }
2677 nigel 3 else
2678     {
2679 nigel 7 size_t i;
2680 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2681 nigel 3 {
2682     if (pmatch[i].rm_so >= 0)
2683     {
2684 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2685 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2686     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2687 nigel 3 fprintf(outfile, "\n");
2688 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2689 nigel 35 {
2690 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
2691 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2692     outfile);
2693 nigel 35 fprintf(outfile, "\n");
2694     }
2695 nigel 3 }
2696     }
2697     }
2698 nigel 53 free(pmatch);
2699 nigel 3 }
2700    
2701 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2702 nigel 3
2703 nigel 37 else
2704     #endif /* !defined NOPOSIX */
2705    
2706 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2707 nigel 3 {
2708 ph10 512 markptr = NULL;
2709    
2710 nigel 93 if (timeitm > 0)
2711 nigel 3 {
2712     register int i;
2713     clock_t time_taken;
2714     clock_t start_time = clock();
2715 nigel 77
2716 nigel 79 #if !defined NODFA
2717 nigel 77 if (all_use_dfa || use_dfa)
2718     {
2719     int workspace[1000];
2720 nigel 93 for (i = 0; i < timeitm; i++)
2721 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2722 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2723     sizeof(workspace)/sizeof(int));
2724     }
2725     else
2726 nigel 79 #endif
2727 nigel 77
2728 nigel 93 for (i = 0; i < timeitm; i++)
2729 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2730 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2731 nigel 77
2732 nigel 3 time_taken = clock() - start_time;
2733 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2734     (((double)time_taken * 1000.0) / (double)timeitm) /
2735 nigel 63 (double)CLOCKS_PER_SEC);
2736 nigel 3 }
2737    
2738 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2739 nigel 87 varying limits in order to find the minimum value for the match limit and
2740 ph10 667 for the recursion limit. The match limits are relevant only to the normal
2741     running of pcre_exec(), so disable the JIT optimization. This makes it
2742     possible to run the same set of tests with and without JIT externally
2743     requested. */
2744 nigel 63
2745     if (find_match_limit)
2746     {
2747     if (extra == NULL)
2748     {
2749 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2750 nigel 63 extra->flags = 0;
2751     }
2752 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2753 ph10 691
2754 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2755 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2756     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2757     PCRE_ERROR_MATCHLIMIT, "match()");
2758 nigel 63
2759 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2760     options|g_notempty, use_offsets, use_size_offsets,
2761     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2762     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2763 nigel 63 }
2764    
2765     /* If callout_data is set, use the interface with additional data */
2766    
2767     else if (callout_data_set)
2768     {
2769     if (extra == NULL)
2770     {
2771 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2772 nigel 63 extra->flags = 0;
2773     }
2774     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2775 nigel 71 extra->callout_data = &callout_data;
2776 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2777     options | g_notempty, use_offsets, use_size_offsets);
2778     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2779     }
2780    
2781     /* The normal case is just to do the match once, with the default
2782     value of match_limit. */
2783    
2784 nigel 79 #if !defined NODFA
2785 nigel 77 else if (all_use_dfa || use_dfa)
2786     {
2787     int workspace[1000];
2788 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2789 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2790     sizeof(workspace)/sizeof(int));
2791     if (count == 0)
2792     {
2793     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2794     count = use_size_offsets/2;
2795     }
2796     }
2797 nigel 79 #endif
2798 nigel 77
2799 nigel 75 else
2800     {
2801     count = pcre_exec(re, extra, (char *)bptr, len,
2802     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2803 nigel 77 if (count == 0)
2804     {
2805     fprintf(outfile, "Matched, but too many substrings\n");
2806     count = use_size_offsets/3;
2807     }
2808 nigel 75 }
2809 nigel 3
2810 nigel 39 /* Matched */
2811    
2812 nigel 3 if (count >= 0)
2813     {
2814 nigel 93 int i, maxcount;
2815    
2816     #if !defined NODFA
2817     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2818     #endif
2819     maxcount = use_size_offsets/3;
2820    
2821     /* This is a check against a lunatic return value. */
2822    
2823     if (count > maxcount)
2824     {
2825     fprintf(outfile,
2826     "** PCRE error: returned count %d is too big for offset size %d\n",
2827     count, use_size_offsets);
2828     count = use_size_offsets/3;
2829     if (do_g || do_G)
2830     {
2831     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2832     do_g = do_G = FALSE; /* Break g/G loop */
2833     }
2834     }
2835 ph10 654
2836 ph10 626 /* do_allcaps requests showing of all captures in the pattern, to check
2837     unset ones at the end. */
2838 ph10 654
2839 ph10 626 if (do_allcaps)
2840     {
2841     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2842 ph10 654 count++; /* Allow for full match */
2843     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2844     }
2845 nigel 93
2846 ph10 626 /* Output the captured substrings */
2847 ph10 654
2848 nigel 29 for (i = 0; i < count * 2; i += 2)
2849 nigel 3 {
2850 nigel 57 if (use_offsets[i] < 0)
2851 ph10 654 {
2852 ph10 626 if (use_offsets[i] != -1)
2853     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2854 ph10 654 use_offsets[i], i);
2855 ph10 626 if (use_offsets[i+1] != -1)
2856     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2857 ph10 654 use_offsets[i+1], i+1);
2858 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2859 ph10 654 }
2860 nigel 3 else
2861     {
2862     fprintf(outfile, "%2d: ", i/2);
2863 nigel 63 (void)pchars(bptr + use_offsets[i],
2864     use_offsets[i+1] - use_offsets[i], outfile);
2865 nigel 3 fprintf(outfile, "\n");
2866 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2867 nigel 35 {
2868 ph10 616 fprintf(outfile, "%2d+ ", i/2);
2869     (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2870     outfile);
2871     fprintf(outfile, "\n");
2872 nigel 35 }
2873 nigel 3 }
2874     }
2875 ph10 512
2876 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2877 nigel 29
2878     for (i = 0; i < 32; i++)
2879     {
2880     if ((copystrings & (1 << i)) != 0)
2881     {
2882 nigel 91 char copybuffer[256];
2883 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2884 nigel 37 i, copybuffer, sizeof(copybuffer));
2885 nigel 29 if (rc < 0)
2886     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2887     else
2888 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2889 nigel 29 }
2890     }
2891    
2892 nigel 91 for (copynamesptr = copynames;
2893     *copynamesptr != 0;
2894     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2895     {
2896     char copybuffer[256];
2897     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2898     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2899     if (rc < 0)
2900     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2901     else
2902     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2903     }
2904    
2905 nigel 29 for (i = 0; i < 32; i++)
2906     {
2907     if ((getstrings & (1 << i)) != 0)
2908     {
2909     const char *substring;
2910 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2911 nigel 29 i, &substring);
2912     if (rc < 0)
2913     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2914     else
2915     {
2916     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2917 nigel 49 pcre_free_substring(substring);
2918 nigel 29 }
2919     }
2920     }
2921    
2922 nigel 91 for (getnamesptr = getnames;
2923     *getnamesptr != 0;
2924     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2925     {
2926     const char *substring;
2927     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2928     count, (char *)getnamesptr, &substring);
2929     if (rc < 0)
2930     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2931     else
2932     {
2933     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2934     pcre_free_substring(substring);
2935     }
2936     }
2937    
2938 nigel 29 if (getlist)
2939     {
2940     const char **stringlist;
2941 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2942 nigel 29 &stringlist);
2943     if (rc < 0)
2944     fprintf(outfile, "get substring list failed %d\n", rc);
2945     else
2946     {
2947     for (i = 0; i < count; i++)
2948     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2949     if (stringlist[i] != NULL)
2950     fprintf(outfile, "string list not terminated by NULL\n");
2951 nigel 49 pcre_free_substring_list(stringlist);
2952 nigel 29 }
2953     }
2954 nigel 39 }
2955 nigel 29
2956 nigel 75 /* There was a partial match */
2957    
2958     else if (count == PCRE_ERROR_PARTIAL)
2959     {
2960 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2961     else fprintf(outfile, "Partial match, mark=%s", markptr);
2962 ph10 426 if (use_size_offsets > 1)
2963     {
2964     fprintf(outfile, ": ");
2965     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2966 ph10 461 outfile);
2967     }
2968 nigel 77 fprintf(outfile, "\n");
2969 nigel 75 break; /* Out of the /g loop */
2970     }
2971    
2972 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2973 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2974     to advance the start offset, and continue. We won't be at the end of the
2975     string - that was checked before setting g_notempty.
2976 nigel 39
2977 ph10 566 Complication arises in the case when the newline convention is "any",
2978 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2979     terminated by CRLF, an advance of one character just passes the \r,
2980 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2981 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2982     newline setting in the pattern; if none was set, use pcre_config() to
2983 ph10 566 find the default.
2984 ph10 144
2985 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2986     character, not one byte. */
2987    
2988 nigel 3 else
2989     {
2990 nigel 41 if (g_notempty != 0)
2991 nigel 35 {
2992 nigel 73 int onechar = 1;
2993 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2994 nigel 57 use_offsets[0] = start_offset;
2995 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2996     {
2997     int d;
2998     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2999 ph10 391 /* Note that these values are always the ASCII ones, even in
3000     EBCDIC environments. CR = 13, NL = 10. */
3001     obits = (d == 13)? PCRE_NEWLINE_CR :
3002     (d == 10)? PCRE_NEWLINE_LF :
3003     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3004 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
3005 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
3006     }
3007 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3008 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3009 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3010 ph10 149 &&
3011 ph10 143 start_offset < len - 1 &&
3012     bptr[start_offset] == '\r' &&
3013     bptr[start_offset+1] == '\n')
3014 ph10 144 onechar++;
3015 ph10 143 else if (use_utf8)
3016 nigel 73 {
3017     while (start_offset + onechar < len)
3018     {
3019 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3020 ph10 579 onechar++;
3021 nigel 73 }
3022     }
3023     use_offsets[1] = start_offset + onechar;
3024 nigel 35 }
3025 nigel 41 else
3026     {
3027 ph10 598 switch(count)
3028 ph10 654 {
3029 ph10 598 case PCRE_ERROR_NOMATCH:
3030 ph10 512 if (gmatched == 0)
3031 ph10 510 {
3032     if (markptr == NULL) fprintf(outfile, "No match\n");
3033     else fprintf(outfile, "No match, mark = %s\n", markptr);
3034 ph10 512 }
3035 ph10 598 break;
3036 ph10 654
3037 ph10 598 case PCRE_ERROR_BADUTF8:
3038     case PCRE_ERROR_SHORTUTF8:
3039     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3040     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3041     if (use_size_offsets >= 2)
3042 ph10 654 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3043 ph10 598 use_offsets[1]);
3044 ph10 654 fprintf(outfile, "\n");
3045     break;
3046    
3047 ph10 598 default:
3048 ph10 654 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3049 ph10 604 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3050 ph10 654 else
3051     fprintf(outfile, "Error %d (Unexpected value)\n", count);
3052 ph10 598 break;
3053 nigel 41 }
3054 ph10 654
3055 nigel 41 break; /* Out of the /g loop */
3056     }
3057 nigel 3 }
3058 nigel 35
3059 nigel 39 /* If not /g or /G we are done */
3060    
3061     if (!do_g && !do_G) break;
3062    
3063 nigel 41 /* If we have matched an empty string, first check to see if we are at
3064 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3065     Perl's /g options does. This turns out to be rather cunning. First we set
3066     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3067 nigel 47 same point. If this fails (picked up above) we advance to the next
3068 ph10 143 character. */
3069 ph10 142
3070 nigel 41 g_notempty = 0;
3071 ph10 142
3072 nigel 57 if (use_offsets[0] == use_offsets[1])
3073 nigel 41 {
3074 nigel 57 if (use_offsets[0] == len) break;
3075 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3076 nigel 41 }
3077 nigel 39
3078     /* For /g, update the start offset, leaving the rest alone */
3079    
3080 ph10 143 if (do_g) start_offset = use_offsets[1];
3081 nigel 39
3082     /* For /G, update the pointer and length */
3083    
3084     else
3085 nigel 35 {
3086 ph10 143 bptr += use_offsets[1];
3087     len -= use_offsets[1];
3088 nigel 35 }
3089 nigel 39 } /* End of loop for /g and /G */
3090 nigel 91
3091     NEXT_DATA: continue;
3092 nigel 39 } /* End of loop for data lines */
3093 nigel 3
3094 nigel 11 CONTINUE:
3095 nigel 37
3096     #if !defined NOPOSIX
3097 nigel 3 if (posix || do_posix) regfree(&preg);
3098 nigel 37 #endif
3099    
3100 nigel 77 if (re != NULL) new_free(re);
3101 ph10 667 if (extra != NULL) pcre_free_study(extra);
3102 ph10 541 if (locale_set)
3103 nigel 25 {
3104 nigel 77 new_free((void *)tables);
3105 nigel 25 setlocale(LC_CTYPE, "C");
3106 nigel 93 locale_set = 0;
3107 nigel 25 }
3108 ph10 691 if (jit_stack != NULL)
3109 ph10 667 {
3110     pcre_jit_stack_free(jit_stack);
3111 ph10 691 jit_stack = NULL;
3112     }
3113 nigel 3 }
3114    
3115 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3116 nigel 77
3117     EXIT:
3118    
3119     if (infile != NULL && infile != stdin) fclose(infile);
3120     if (outfile != NULL && outfile != stdout) fclose(outfile);
3121    
3122     free(buffer);
3123     free(dbuffer);
3124     free(pbuffer);
3125     free(offsets);
3126    
3127     return yield;
3128 nigel 3 }
3129    
3130 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12