/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Contents of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 756 - (hide annotations) (download)
Mon Nov 21 10:48:42 2011 UTC (2 years, 4 months ago) by ph10
File MIME type: text/plain
File size: 93968 byte(s)
Apply Zoltan's big patch.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 ph10 667 #define _pcre_ucp_typerange ucp_typerange
116 nigel 85 #define _pcre_utf8_table1 utf8_table1
117     #define _pcre_utf8_table1_size utf8_table1_size
118     #define _pcre_utf8_table2 utf8_table2
119     #define _pcre_utf8_table3 utf8_table3
120     #define _pcre_utf8_table4 utf8_table4
121     #define _pcre_utt utt
122     #define _pcre_utt_size utt_size
123 ph10 240 #define _pcre_utt_names utt_names
124 nigel 85 #define _pcre_OP_lengths OP_lengths
125    
126     #include "pcre_tables.c"
127    
128     /* We also need the pcre_printint() function for printing out compiled
129     patterns. This function is in a separate file so that it can be included in
130 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
131 ph10 498 know which case is being compiled. */
132 nigel 85
133 ph10 498 #define COMPILING_PCRETEST
134     #include "pcre_printint.src"
135    
136     /* The definition of the macro PRINTABLE, which determines whether to print an
137 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
138 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
139     locale has not been explicitly changed, so as to get consistent output from
140     systems that differ in their output from isprint() even in the "C" locale. */
141 nigel 93
142     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
143 nigel 85
144 nigel 37 /* It is possible to compile this test program without including support for
145     testing the POSIX interface, though this is not available via the standard
146     Makefile. */
147    
148     #if !defined NOPOSIX
149 nigel 3 #include "pcreposix.h"
150 nigel 37 #endif
151 nigel 3
152 ph10 107 /* It is also possible, for the benefit of the version currently imported into
153     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
154     interface to the DFA matcher (NODFA), and without the doublecheck of the old
155     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
156     UTF8 support if PCRE is built without it. */
157 nigel 79
158 ph10 107 #ifndef SUPPORT_UTF8
159     #ifndef NOUTF8
160     #define NOUTF8
161     #endif
162     #endif
163 nigel 79
164 ph10 107
165 nigel 85 /* Other parameters */
166    
167 nigel 3 #ifndef CLOCKS_PER_SEC
168     #ifdef CLK_TCK
169     #define CLOCKS_PER_SEC CLK_TCK
170     #else
171     #define CLOCKS_PER_SEC 100
172     #endif
173     #endif
174    
175 nigel 93 /* This is the default loop count for timing. */
176    
177 nigel 75 #define LOOPREPEAT 500000
178 nigel 3
179 nigel 85 /* Static variables */
180    
181 nigel 3 static FILE *outfile;
182     static int log_store = 0;
183 nigel 63 static int callout_count;
184     static int callout_extra;
185     static int callout_fail_count;
186     static int callout_fail_id;
187 ph10 210 static int debug_lengths;
188 nigel 63 static int first_callout;
189 nigel 93 static int locale_set = 0;
190 nigel 73 static int show_malloc;
191 nigel 67 static int use_utf8;
192 nigel 43 static size_t gotten_store;
193 ph10 645 static const unsigned char *last_callout_mark = NULL;
194 nigel 3
195 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
196    
197     static int buffer_size = 50000;
198 ph10 756 static pcre_uint8 *buffer = NULL;
199     static pcre_uint8 *dbuffer = NULL;
200     static pcre_uint8 *pbuffer = NULL;
201 nigel 3
202 ph10 598 /* Textual explanations for runtime error codes */
203 nigel 75
204 ph10 598 static const char *errtexts[] = {
205     NULL, /* 0 is no error */
206     NULL, /* NOMATCH is handled specially */
207     "NULL argument passed",
208     "bad option value",
209     "magic number missing",
210     "unknown opcode - pattern overwritten?",
211     "no more memory",
212 ph10 654 NULL, /* never returned by pcre_exec() or pcre_dfa_exec() */
213 ph10 598 "match limit exceeded",
214     "callout error code",
215     NULL, /* BADUTF8 is handled specially */
216     "bad UTF-8 offset",
217     NULL, /* PARTIAL is handled specially */
218     "not used - internal error",
219     "internal error - pattern overwritten?",
220     "bad count value",
221     "item unsupported for DFA matching",
222     "backreference condition or recursion test not supported for DFA matching",
223     "match limit not supported for DFA matching",
224     "workspace size exceeded in DFA matching",
225 ph10 654 "too much recursion for DFA matching",
226 ph10 598 "recursion limit exceeded",
227     "not used - internal error",
228     "invalid combination of newline options",
229     "bad offset value",
230 ph10 642 NULL, /* SHORTUTF8 is handled specially */
231 ph10 676 "nested recursion at the same subject position",
232 ph10 691 "JIT stack limit reached"
233 ph10 598 };
234    
235 ph10 654
236 ph10 541 /*************************************************
237     * Alternate character tables *
238     *************************************************/
239 nigel 49
240 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
241     using the default tables of the library. However, the T option can be used to
242     select alternate sets of tables, for different kinds of testing. Note also that
243 ph10 541 the L (locale) option also adjusts the tables. */
244    
245 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
246 ph10 541 only ASCII characters. */
247    
248     static const unsigned char tables0[] = {
249    
250     /* This table is a lower casing table. */
251    
252     0, 1, 2, 3, 4, 5, 6, 7,
253     8, 9, 10, 11, 12, 13, 14, 15,
254     16, 17, 18, 19, 20, 21, 22, 23,
255     24, 25, 26, 27, 28, 29, 30, 31,
256     32, 33, 34, 35, 36, 37, 38, 39,
257     40, 41, 42, 43, 44, 45, 46, 47,
258     48, 49, 50, 51, 52, 53, 54, 55,
259     56, 57, 58, 59, 60, 61, 62, 63,
260     64, 97, 98, 99,100,101,102,103,
261     104,105,106,107,108,109,110,111,
262     112,113,114,115,116,117,118,119,
263     120,121,122, 91, 92, 93, 94, 95,
264     96, 97, 98, 99,100,101,102,103,
265     104,105,106,107,108,109,110,111,
266     112,113,114,115,116,117,118,119,
267     120,121,122,123,124,125,126,127,
268     128,129,130,131,132,133,134,135,
269     136,137,138,139,140,141,142,143,
270     144,145,146,147,148,149,150,151,
271     152,153,154,155,156,157,158,159,
272     160,161,162,163,164,165,166,167,
273     168,169,170,171,172,173,174,175,
274     176,177,178,179,180,181,182,183,
275     184,185,186,187,188,189,190,191,
276     192,193,194,195,196,197,198,199,
277     200,201,202,203,204,205,206,207,
278     208,209,210,211,212,213,214,215,
279     216,217,218,219,220,221,222,223,
280     224,225,226,227,228,229,230,231,
281     232,233,234,235,236,237,238,239,
282     240,241,242,243,244,245,246,247,
283     248,249,250,251,252,253,254,255,
284    
285     /* This table is a case flipping table. */
286    
287     0, 1, 2, 3, 4, 5, 6, 7,
288     8, 9, 10, 11, 12, 13, 14, 15,
289     16, 17, 18, 19, 20, 21, 22, 23,
290     24, 25, 26, 27, 28, 29, 30, 31,
291     32, 33, 34, 35, 36, 37, 38, 39,
292     40, 41, 42, 43, 44, 45, 46, 47,
293     48, 49, 50, 51, 52, 53, 54, 55,
294     56, 57, 58, 59, 60, 61, 62, 63,
295     64, 97, 98, 99,100,101,102,103,
296     104,105,106,107,108,109,110,111,
297     112,113,114,115,116,117,118,119,
298     120,121,122, 91, 92, 93, 94, 95,
299     96, 65, 66, 67, 68, 69, 70, 71,
300     72, 73, 74, 75, 76, 77, 78, 79,
301     80, 81, 82, 83, 84, 85, 86, 87,
302     88, 89, 90,123,124,125,126,127,
303     128,129,130,131,132,133,134,135,
304     136,137,138,139,140,141,142,143,
305     144,145,146,147,148,149,150,151,
306     152,153,154,155,156,157,158,159,
307     160,161,162,163,164,165,166,167,
308     168,169,170,171,172,173,174,175,
309     176,177,178,179,180,181,182,183,
310     184,185,186,187,188,189,190,191,
311     192,193,194,195,196,197,198,199,
312     200,201,202,203,204,205,206,207,
313     208,209,210,211,212,213,214,215,
314     216,217,218,219,220,221,222,223,
315     224,225,226,227,228,229,230,231,
316     232,233,234,235,236,237,238,239,
317     240,241,242,243,244,245,246,247,
318     248,249,250,251,252,253,254,255,
319    
320     /* This table contains bit maps for various character classes. Each map is 32
321     bytes long and the bits run from the least significant end of each byte. The
322     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
323     graph, print, punct, and cntrl. Other classes are built from combinations. */
324    
325     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
326     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
327     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329    
330     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
331     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
332     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334    
335     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
337     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339    
340     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
341     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
342     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
343     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
344    
345     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
346     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
347     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
348     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
349    
350     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
351     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
352     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
353     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
354    
355     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
356     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
357     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
358     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
359    
360     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
361     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
362     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
363     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
364    
365     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
366     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
367     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
369    
370     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
371     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
372     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
373     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
374    
375     /* This table identifies various classes of character by individual bits:
376     0x01 white space character
377     0x02 letter
378     0x04 decimal digit
379     0x08 hexadecimal digit
380     0x10 alphanumeric or '_'
381     0x80 regular expression metacharacter or binary zero
382     */
383    
384     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
385     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
386     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
387     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
388     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
389     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
390     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
391     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
392     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
393     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
394     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
395     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
396     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
397     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
398     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
399     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
400     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
401     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
402     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
403     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
404     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
405     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
406     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
407     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
408     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
409     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
410     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
411     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
412     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
413     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
414     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
415     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
416    
417 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
418     be at least an approximation of ISO 8859. In particular, there are characters
419 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
420    
421     static const unsigned char tables1[] = {
422     0,1,2,3,4,5,6,7,
423     8,9,10,11,12,13,14,15,
424     16,17,18,19,20,21,22,23,
425     24,25,26,27,28,29,30,31,
426     32,33,34,35,36,37,38,39,
427     40,41,42,43,44,45,46,47,
428     48,49,50,51,52,53,54,55,
429     56,57,58,59,60,61,62,63,
430     64,97,98,99,100,101,102,103,
431     104,105,106,107,108,109,110,111,
432     112,113,114,115,116,117,118,119,
433     120,121,122,91,92,93,94,95,
434     96,97,98,99,100,101,102,103,
435     104,105,106,107,108,109,110,111,
436     112,113,114,115,116,117,118,119,
437     120,121,122,123,124,125,126,127,
438     128,129,130,131,132,133,134,135,
439     136,137,138,139,140,141,142,143,
440     144,145,146,147,148,149,150,151,
441     152,153,154,155,156,157,158,159,
442     160,161,162,163,164,165,166,167,
443     168,169,170,171,172,173,174,175,
444     176,177,178,179,180,181,182,183,
445     184,185,186,187,188,189,190,191,
446     224,225,226,227,228,229,230,231,
447     232,233,234,235,236,237,238,239,
448     240,241,242,243,244,245,246,215,
449     248,249,250,251,252,253,254,223,
450     224,225,226,227,228,229,230,231,
451     232,233,234,235,236,237,238,239,
452     240,241,242,243,244,245,246,247,
453     248,249,250,251,252,253,254,255,
454     0,1,2,3,4,5,6,7,
455     8,9,10,11,12,13,14,15,
456     16,17,18,19,20,21,22,23,
457     24,25,26,27,28,29,30,31,
458     32,33,34,35,36,37,38,39,
459     40,41,42,43,44,45,46,47,
460     48,49,50,51,52,53,54,55,
461     56,57,58,59,60,61,62,63,
462     64,97,98,99,100,101,102,103,
463     104,105,106,107,108,109,110,111,
464     112,113,114,115,116,117,118,119,
465     120,121,122,91,92,93,94,95,
466     96,65,66,67,68,69,70,71,
467     72,73,74,75,76,77,78,79,
468     80,81,82,83,84,85,86,87,
469     88,89,90,123,124,125,126,127,
470     128,129,130,131,132,133,134,135,
471     136,137,138,139,140,141,142,143,
472     144,145,146,147,148,149,150,151,
473     152,153,154,155,156,157,158,159,
474     160,161,162,163,164,165,166,167,
475     168,169,170,171,172,173,174,175,
476     176,177,178,179,180,181,182,183,
477     184,185,186,187,188,189,190,191,
478     224,225,226,227,228,229,230,231,
479     232,233,234,235,236,237,238,239,
480     240,241,242,243,244,245,246,215,
481     248,249,250,251,252,253,254,223,
482     192,193,194,195,196,197,198,199,
483     200,201,202,203,204,205,206,207,
484     208,209,210,211,212,213,214,247,
485     216,217,218,219,220,221,222,255,
486     0,62,0,0,1,0,0,0,
487     0,0,0,0,0,0,0,0,
488     32,0,0,0,1,0,0,0,
489     0,0,0,0,0,0,0,0,
490     0,0,0,0,0,0,255,3,
491     126,0,0,0,126,0,0,0,
492     0,0,0,0,0,0,0,0,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,255,3,
495     0,0,0,0,0,0,0,0,
496     0,0,0,0,0,0,12,2,
497     0,0,0,0,0,0,0,0,
498     0,0,0,0,0,0,0,0,
499     254,255,255,7,0,0,0,0,
500     0,0,0,0,0,0,0,0,
501     255,255,127,127,0,0,0,0,
502     0,0,0,0,0,0,0,0,
503     0,0,0,0,254,255,255,7,
504     0,0,0,0,0,4,32,4,
505     0,0,0,128,255,255,127,255,
506     0,0,0,0,0,0,255,3,
507     254,255,255,135,254,255,255,7,
508     0,0,0,0,0,4,44,6,
509     255,255,127,255,255,255,127,255,
510     0,0,0,0,254,255,255,255,
511     255,255,255,255,255,255,255,127,
512     0,0,0,0,254,255,255,255,
513     255,255,255,255,255,255,255,255,
514     0,2,0,0,255,255,255,255,
515     255,255,255,255,255,255,255,127,
516     0,0,0,0,255,255,255,255,
517     255,255,255,255,255,255,255,255,
518     0,0,0,0,254,255,0,252,
519     1,0,0,248,1,0,0,120,
520     0,0,0,0,254,255,255,255,
521     0,0,128,0,0,0,128,0,
522     255,255,255,255,0,0,0,0,
523     0,0,0,0,0,0,0,128,
524     255,255,255,255,0,0,0,0,
525     0,0,0,0,0,0,0,0,
526     128,0,0,0,0,0,0,0,
527     0,1,1,0,1,1,0,0,
528     0,0,0,0,0,0,0,0,
529     0,0,0,0,0,0,0,0,
530     1,0,0,0,128,0,0,0,
531     128,128,128,128,0,0,128,0,
532     28,28,28,28,28,28,28,28,
533     28,28,0,0,0,0,0,128,
534     0,26,26,26,26,26,26,18,
535     18,18,18,18,18,18,18,18,
536     18,18,18,18,18,18,18,18,
537     18,18,18,128,128,0,128,16,
538     0,26,26,26,26,26,26,18,
539     18,18,18,18,18,18,18,18,
540     18,18,18,18,18,18,18,18,
541     18,18,18,128,128,0,0,0,
542     0,0,0,0,0,1,0,0,
543     0,0,0,0,0,0,0,0,
544     0,0,0,0,0,0,0,0,
545     0,0,0,0,0,0,0,0,
546     1,0,0,0,0,0,0,0,
547     0,0,18,0,0,0,0,0,
548     0,0,20,20,0,18,0,0,
549     0,20,18,0,0,0,0,0,
550     18,18,18,18,18,18,18,18,
551     18,18,18,18,18,18,18,18,
552     18,18,18,18,18,18,18,0,
553     18,18,18,18,18,18,18,18,
554     18,18,18,18,18,18,18,18,
555     18,18,18,18,18,18,18,18,
556     18,18,18,18,18,18,18,0,
557     18,18,18,18,18,18,18,18
558     };
559    
560    
561    
562 ph10 558
563     #ifndef HAVE_STRERROR
564 nigel 49 /*************************************************
565 ph10 558 * Provide strerror() for non-ANSI libraries *
566     *************************************************/
567    
568     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
569     in their libraries, but can provide the same facility by this simple
570     alternative function. */
571    
572     extern int sys_nerr;
573     extern char *sys_errlist[];
574    
575     char *
576     strerror(int n)
577     {
578     if (n < 0 || n >= sys_nerr) return "unknown error number";
579     return sys_errlist[n];
580     }
581     #endif /* HAVE_STRERROR */
582    
583    
584 ph10 667 /*************************************************
585     * JIT memory callback *
586     *************************************************/
587 ph10 558
588 ph10 667 static pcre_jit_stack* jit_callback(void *arg)
589     {
590     return (pcre_jit_stack *)arg;
591     }
592 ph10 558
593 ph10 667
594 ph10 558 /*************************************************
595 nigel 91 * Read or extend an input line *
596     *************************************************/
597    
598     /* Input lines are read into buffer, but both patterns and data lines can be
599     continued over multiple input lines. In addition, if the buffer fills up, we
600     want to automatically expand it so as to be able to handle extremely large
601     lines that are needed for certain stress tests. When the input buffer is
602     expanded, the other two buffers must also be expanded likewise, and the
603     contents of pbuffer, which are a copy of the input for callouts, must be
604     preserved (for when expansion happens for a data line). This is not the most
605     optimal way of handling this, but hey, this is just a test program!
606    
607     Arguments:
608     f the file to read
609     start where in buffer to start (this *must* be within buffer)
610 ph10 287 prompt for stdin or readline()
611 nigel 91
612     Returns: pointer to the start of new data
613     could be a copy of start, or could be moved
614     NULL if no data read and EOF reached
615     */
616    
617 ph10 756 static pcre_uint8 *
618     extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
619 nigel 91 {
620 ph10 756 pcre_uint8 *here = start;
621 nigel 91
622     for (;;)
623     {
624 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
625 nigel 93
626 nigel 91 if (rlen > 1000)
627     {
628     int dlen;
629 ph10 289
630 ph10 287 /* If libreadline support is required, use readline() to read a line if the
631     input is a terminal. Note that readline() removes the trailing newline, so
632     we must put it back again, to be compatible with fgets(). */
633 ph10 289
634 ph10 287 #ifdef SUPPORT_LIBREADLINE
635     if (isatty(fileno(f)))
636     {
637 ph10 289 size_t len;
638 ph10 287 char *s = readline(prompt);
639     if (s == NULL) return (here == start)? NULL : start;
640     len = strlen(s);
641 ph10 289 if (len > 0) add_history(s);
642 ph10 287 if (len > rlen - 1) len = rlen - 1;
643     memcpy(here, s, len);
644     here[len] = '\n';
645 ph10 289 here[len+1] = 0;
646     free(s);
647 ph10 287 }
648 ph10 289 else
649     #endif
650    
651 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
652 ph10 289
653 ph10 287 {
654 ph10 516 if (f == stdin) printf("%s", prompt);
655 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
656     return (here == start)? NULL : start;
657 ph10 289 }
658    
659 nigel 91 dlen = (int)strlen((char *)here);
660     if (dlen > 0 && here[dlen - 1] == '\n') return start;
661     here += dlen;
662     }
663    
664     else
665     {
666     int new_buffer_size = 2*buffer_size;
667 ph10 756 pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);
668     pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
669     pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
670 nigel 91
671     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
672     {
673     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
674     exit(1);
675     }
676    
677     memcpy(new_buffer, buffer, buffer_size);
678     memcpy(new_pbuffer, pbuffer, buffer_size);
679    
680     buffer_size = new_buffer_size;
681    
682     start = new_buffer + (start - buffer);
683     here = new_buffer + (here - buffer);
684    
685     free(buffer);
686     free(dbuffer);
687     free(pbuffer);
688    
689     buffer = new_buffer;
690     dbuffer = new_dbuffer;
691     pbuffer = new_pbuffer;
692     }
693     }
694    
695     return NULL; /* Control never gets here */
696     }
697    
698    
699    
700    
701    
702    
703    
704     /*************************************************
705 nigel 63 * Read number from string *
706     *************************************************/
707    
708     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
709     around with conditional compilation, just do the job by hand. It is only used
710 nigel 93 for unpicking arguments, so just keep it simple.
711 nigel 63
712     Arguments:
713     str string to be converted
714     endptr where to put the end pointer
715    
716     Returns: the unsigned long
717     */
718    
719     static int
720     get_value(unsigned char *str, unsigned char **endptr)
721     {
722     int result = 0;
723     while(*str != 0 && isspace(*str)) str++;
724     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
725     *endptr = str;
726     return(result);
727     }
728    
729    
730    
731 nigel 49
732     /*************************************************
733     * Convert UTF-8 string to value *
734     *************************************************/
735    
736     /* This function takes one or more bytes that represents a UTF-8 character,
737     and returns the value of the character.
738    
739     Argument:
740 nigel 91 utf8bytes a pointer to the byte vector
741     vptr a pointer to an int to receive the value
742 nigel 49
743 nigel 91 Returns: > 0 => the number of bytes consumed
744     -6 to 0 => malformed UTF-8 character at offset = (-return)
745 nigel 49 */
746    
747 nigel 79 #if !defined NOUTF8
748    
749 nigel 67 static int
750 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
751 nigel 49 {
752 nigel 91 int c = *utf8bytes++;
753 nigel 49 int d = c;
754     int i, j, s;
755    
756     for (i = -1; i < 6; i++) /* i is number of additional bytes */
757     {
758     if ((d & 0x80) == 0) break;
759     d <<= 1;
760     }
761    
762     if (i == -1) { *vptr = c; return 1; } /* ascii character */
763     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
764    
765     /* i now has a value in the range 1-5 */
766    
767 nigel 59 s = 6*i;
768 nigel 85 d = (c & utf8_table3[i]) << s;
769 nigel 49
770     for (j = 0; j < i; j++)
771     {
772 nigel 91 c = *utf8bytes++;
773 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
774 nigel 59 s -= 6;
775 nigel 49 d |= (c & 0x3f) << s;
776     }
777    
778     /* Check that encoding was the correct unique one */
779    
780 nigel 85 for (j = 0; j < utf8_table1_size; j++)
781     if (d <= utf8_table1[j]) break;
782 nigel 49 if (j != i) return -(i+1);
783    
784     /* Valid value */
785    
786     *vptr = d;
787     return i+1;
788     }
789    
790 nigel 79 #endif
791 nigel 49
792    
793 nigel 79
794 nigel 63 /*************************************************
795 nigel 85 * Convert character value to UTF-8 *
796     *************************************************/
797    
798     /* This function takes an integer value in the range 0 - 0x7fffffff
799     and encodes it as a UTF-8 character in 0 to 6 bytes.
800    
801     Arguments:
802     cvalue the character value
803 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
804 nigel 85
805     Returns: number of characters placed in the buffer
806     */
807    
808 nigel 93 #if !defined NOUTF8
809    
810 nigel 85 static int
811 ph10 756 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
812 nigel 85 {
813     register int i, j;
814     for (i = 0; i < utf8_table1_size; i++)
815     if (cvalue <= utf8_table1[i]) break;
816 nigel 91 utf8bytes += i;
817 nigel 85 for (j = i; j > 0; j--)
818     {
819 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
820 nigel 85 cvalue >>= 6;
821     }
822 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
823 nigel 85 return i + 1;
824     }
825    
826 nigel 93 #endif
827 nigel 85
828    
829 nigel 93
830 nigel 85 /*************************************************
831 nigel 63 * Print character string *
832     *************************************************/
833 nigel 49
834 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
835     mode. Yields number of characters printed. If handed a NULL file, just counts
836     chars without printing. */
837 nigel 49
838 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
839 nigel 3 {
840 nigel 85 int c = 0;
841 nigel 63 int yield = 0;
842 nigel 3
843 nigel 63 while (length-- > 0)
844 nigel 3 {
845 nigel 79 #if !defined NOUTF8
846 nigel 67 if (use_utf8)
847 nigel 63 {
848     int rc = utf82ord(p, &c);
849 nigel 3
850 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
851     {
852     length -= rc - 1;
853     p += rc;
854 nigel 93 if (PRINTHEX(c))
855 nigel 63 {
856     if (f != NULL) fprintf(f, "%c", c);
857     yield++;
858     }
859     else
860     {
861 nigel 93 int n = 4;
862     if (f != NULL) fprintf(f, "\\x{%02x}", c);
863     yield += (n <= 0x000000ff)? 2 :
864     (n <= 0x00000fff)? 3 :
865     (n <= 0x0000ffff)? 4 :
866     (n <= 0x000fffff)? 5 : 6;
867 nigel 63 }
868     continue;
869     }
870     }
871 nigel 79 #endif
872 nigel 3
873 nigel 63 /* Not UTF-8, or malformed UTF-8 */
874    
875 nigel 93 c = *p++;
876     if (PRINTHEX(c))
877 nigel 3 {
878 nigel 63 if (f != NULL) fprintf(f, "%c", c);
879     yield++;
880 nigel 3 }
881 nigel 63 else
882 nigel 3 {
883 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
884     yield += 4;
885     }
886     }
887 nigel 3
888 nigel 63 return yield;
889     }
890 nigel 23
891 nigel 3
892 nigel 23
893 nigel 63 /*************************************************
894     * Callout function *
895     *************************************************/
896 nigel 3
897 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
898     the match. Yield zero unless more callouts than the fail count, or the callout
899     data is not zero. */
900 nigel 3
901 nigel 63 static int callout(pcre_callout_block *cb)
902     {
903     FILE *f = (first_callout | callout_extra)? outfile : NULL;
904 nigel 75 int i, pre_start, post_start, subject_length;
905 nigel 3
906 nigel 63 if (callout_extra)
907     {
908     fprintf(f, "Callout %d: last capture = %d\n",
909     cb->callout_number, cb->capture_last);
910 nigel 3
911 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
912     {
913     if (cb->offset_vector[i] < 0)
914     fprintf(f, "%2d: <unset>\n", i/2);
915     else
916     {
917     fprintf(f, "%2d: ", i/2);
918     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
919     cb->offset_vector[i+1] - cb->offset_vector[i], f);
920     fprintf(f, "\n");
921     }
922     }
923     }
924 nigel 3
925 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
926     datails. On subsequent calls in the same match, we use pchars just to find the
927     printed lengths of the substrings. */
928 nigel 3
929 nigel 63 if (f != NULL) fprintf(f, "--->");
930 nigel 3
931 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
932     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
933     cb->current_position - cb->start_match, f);
934 nigel 3
935 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
936    
937 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
938     cb->subject_length - cb->current_position, f);
939 nigel 3
940 nigel 63 if (f != NULL) fprintf(f, "\n");
941 nigel 9
942 nigel 63 /* Always print appropriate indicators, with callout number if not already
943 nigel 75 shown. For automatic callouts, show the pattern offset. */
944 nigel 3
945 nigel 75 if (cb->callout_number == 255)
946     {
947     fprintf(outfile, "%+3d ", cb->pattern_position);
948     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
949     }
950     else
951     {
952     if (callout_extra) fprintf(outfile, " ");
953     else fprintf(outfile, "%3d ", cb->callout_number);
954     }
955 nigel 3
956 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
957     fprintf(outfile, "^");
958 nigel 3
959 nigel 63 if (post_start > 0)
960     {
961     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
962     fprintf(outfile, "^");
963 nigel 3 }
964    
965 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
966     fprintf(outfile, " ");
967    
968     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
969     pbuffer + cb->pattern_position);
970    
971 nigel 63 fprintf(outfile, "\n");
972     first_callout = 0;
973 nigel 3
974 ph10 654 if (cb->mark != last_callout_mark)
975 ph10 645 {
976 ph10 654 fprintf(outfile, "Latest Mark: %s\n",
977 ph10 645 (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
978 ph10 654 last_callout_mark = cb->mark;
979     }
980 ph10 645
981 nigel 71 if (cb->callout_data != NULL)
982 nigel 49 {
983 nigel 71 int callout_data = *((int *)(cb->callout_data));
984     if (callout_data != 0)
985     {
986     fprintf(outfile, "Callout data = %d\n", callout_data);
987     return callout_data;
988     }
989 nigel 63 }
990 nigel 49
991 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
992     (++callout_count >= callout_fail_count)? 1 : 0;
993 nigel 3 }
994    
995    
996 nigel 63 /*************************************************
997 nigel 73 * Local malloc functions *
998 nigel 63 *************************************************/
999 nigel 3
1000 ph10 667 /* Alternative malloc function, to test functionality and save the size of a
1001     compiled re. The show_malloc variable is set only during matching. */
1002 nigel 3
1003     static void *new_malloc(size_t size)
1004     {
1005 nigel 73 void *block = malloc(size);
1006 nigel 43 gotten_store = size;
1007 nigel 73 if (show_malloc)
1008 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
1009 nigel 73 return block;
1010 nigel 3 }
1011    
1012 nigel 73 static void new_free(void *block)
1013     {
1014     if (show_malloc)
1015     fprintf(outfile, "free %p\n", block);
1016     free(block);
1017     }
1018 nigel 3
1019 nigel 73 /* For recursion malloc/free, to test stacking calls */
1020    
1021     static void *stack_malloc(size_t size)
1022     {
1023     void *block = malloc(size);
1024     if (show_malloc)
1025 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
1026 nigel 73 return block;
1027     }
1028    
1029     static void stack_free(void *block)
1030     {
1031     if (show_malloc)
1032     fprintf(outfile, "stack_free %p\n", block);
1033     free(block);
1034     }
1035    
1036    
1037 nigel 63 /*************************************************
1038     * Call pcre_fullinfo() *
1039     *************************************************/
1040 nigel 43
1041     /* Get one piece of information from the pcre_fullinfo() function */
1042    
1043     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1044     {
1045     int rc;
1046     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
1047     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
1048     }
1049    
1050    
1051    
1052 nigel 63 /*************************************************
1053 nigel 75 * Byte flipping function *
1054     *************************************************/
1055    
1056 nigel 91 static unsigned long int
1057     byteflip(unsigned long int value, int n)
1058 nigel 75 {
1059     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1060     return ((value & 0x000000ff) << 24) |
1061     ((value & 0x0000ff00) << 8) |
1062     ((value & 0x00ff0000) >> 8) |
1063     ((value & 0xff000000) >> 24);
1064     }
1065    
1066    
1067    
1068    
1069     /*************************************************
1070 nigel 87 * Check match or recursion limit *
1071     *************************************************/
1072    
1073     static int
1074 ph10 756 check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1075 nigel 87 int start_offset, int options, int *use_offsets, int use_size_offsets,
1076     int flag, unsigned long int *limit, int errnumber, const char *msg)
1077     {
1078     int count;
1079     int min = 0;
1080     int mid = 64;
1081     int max = -1;
1082    
1083     extra->flags |= flag;
1084    
1085     for (;;)
1086     {
1087     *limit = mid;
1088    
1089     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1090     use_offsets, use_size_offsets);
1091    
1092     if (count == errnumber)
1093     {
1094     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1095     min = mid;
1096     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1097     }
1098    
1099     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1100     count == PCRE_ERROR_PARTIAL)
1101     {
1102     if (mid == min + 1)
1103     {
1104     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1105     break;
1106     }
1107     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1108     max = mid;
1109     mid = (min + mid)/2;
1110     }
1111     else break; /* Some other error */
1112     }
1113    
1114     extra->flags &= ~flag;
1115     return count;
1116     }
1117    
1118    
1119    
1120     /*************************************************
1121 ph10 227 * Case-independent strncmp() function *
1122     *************************************************/
1123    
1124     /*
1125     Arguments:
1126     s first string
1127     t second string
1128     n number of characters to compare
1129    
1130     Returns: < 0, = 0, or > 0, according to the comparison
1131     */
1132    
1133     static int
1134 ph10 756 strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1135 ph10 227 {
1136     while (n--)
1137     {
1138     int c = tolower(*s++) - tolower(*t++);
1139     if (c) return c;
1140     }
1141     return 0;
1142     }
1143    
1144    
1145    
1146     /*************************************************
1147 nigel 91 * Check newline indicator *
1148     *************************************************/
1149    
1150 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1151     a message and return 0 if there is no match.
1152 nigel 91
1153     Arguments:
1154     p points after the leading '<'
1155     f file for error message
1156    
1157     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1158     */
1159    
1160     static int
1161 ph10 756 check_newline(pcre_uint8 *p, FILE *f)
1162 nigel 91 {
1163 ph10 756 if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1164     if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1165     if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1166     if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1167     if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1168     if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1169     if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1170 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1171     return 0;
1172     }
1173    
1174    
1175    
1176     /*************************************************
1177 nigel 93 * Usage function *
1178     *************************************************/
1179    
1180     static void
1181     usage(void)
1182     {
1183 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1184     printf("Input and output default to stdin and stdout.\n");
1185     #ifdef SUPPORT_LIBREADLINE
1186     printf("If input is a terminal, readline() is used to read from it.\n");
1187     #else
1188     printf("This version of pcretest is not linked with readline().\n");
1189     #endif
1190     printf("\nOptions:\n");
1191 nigel 93 printf(" -b show compiled code (bytecode)\n");
1192     printf(" -C show PCRE compile-time options and exit\n");
1193     printf(" -d debug: show compiled code and information (-b and -i)\n");
1194     #if !defined NODFA
1195     printf(" -dfa force DFA matching for all subjects\n");
1196     #endif
1197     printf(" -help show usage information\n");
1198     printf(" -i show information about compiled patterns\n"
1199 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1200 nigel 93 " -m output memory used information\n"
1201     " -o <n> set size of offsets vector to <n>\n");
1202     #if !defined NOPOSIX
1203     printf(" -p use POSIX interface\n");
1204     #endif
1205     printf(" -q quiet: do not output PCRE version number at start\n");
1206     printf(" -S <n> set stack size to <n> megabytes\n");
1207 ph10 667 printf(" -s force each pattern to be studied at basic level\n"
1208     " -s+ force each pattern to be studied, using JIT if available\n"
1209 nigel 93 " -t time compilation and execution\n");
1210     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1211     printf(" -tm time execution (matching) only\n");
1212     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1213     }
1214    
1215    
1216    
1217     /*************************************************
1218 nigel 63 * Main Program *
1219     *************************************************/
1220 nigel 43
1221 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1222     consist of a regular expression, in delimiters and optionally followed by
1223     options, followed by a set of test data, terminated by an empty line. */
1224    
1225     int main(int argc, char **argv)
1226     {
1227     FILE *infile = stdin;
1228     int options = 0;
1229     int study_options = 0;
1230 ph10 386 int default_find_match_limit = FALSE;
1231 nigel 3 int op = 1;
1232     int timeit = 0;
1233 nigel 93 int timeitm = 0;
1234 nigel 3 int showinfo = 0;
1235 nigel 31 int showstore = 0;
1236 ph10 667 int force_study = -1;
1237     int force_study_options = 0;
1238 nigel 87 int quiet = 0;
1239 nigel 53 int size_offsets = 45;
1240     int size_offsets_max;
1241 nigel 77 int *offsets = NULL;
1242 nigel 53 #if !defined NOPOSIX
1243 nigel 3 int posix = 0;
1244 nigel 53 #endif
1245 nigel 3 int debug = 0;
1246 nigel 11 int done = 0;
1247 nigel 77 int all_use_dfa = 0;
1248     int yield = 0;
1249 nigel 91 int stack_size;
1250 nigel 3
1251 ph10 667 pcre_jit_stack *jit_stack = NULL;
1252    
1253    
1254 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1255     that 1024 is plenty long enough for the few names we'll be testing. */
1256 nigel 69
1257 ph10 756 pcre_uchar copynames[1024];
1258     pcre_uchar getnames[1024];
1259 nigel 91
1260 ph10 756 pcre_uchar *copynamesptr;
1261     pcre_uchar *getnamesptr;
1262 nigel 91
1263 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1264 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1265 nigel 69
1266 ph10 756 buffer = (pcre_uint8 *)malloc(buffer_size);
1267     dbuffer = (pcre_uint8 *)malloc(buffer_size);
1268     pbuffer = (pcre_uint8 *)malloc(buffer_size);
1269 nigel 69
1270 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1271 nigel 3
1272 nigel 93 outfile = stdout;
1273    
1274     /* The following _setmode() stuff is some Windows magic that tells its runtime
1275     library to translate CRLF into a single LF character. At least, that's what
1276     I've been told: never having used Windows I take this all on trust. Originally
1277     it set 0x8000, but then I was advised that _O_BINARY was better. */
1278    
1279 nigel 75 #if defined(_WIN32) || defined(WIN32)
1280 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1281     #endif
1282 nigel 75
1283 nigel 3 /* Scan options */
1284    
1285     while (argc > 1 && argv[op][0] == '-')
1286     {
1287 nigel 63 unsigned char *endptr;
1288 nigel 53
1289 ph10 606 if (strcmp(argv[op], "-m") == 0) showstore = 1;
1290 ph10 667 else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1291 ph10 691 else if (strcmp(argv[op], "-s+") == 0)
1292 ph10 667 {
1293     force_study = 1;
1294     force_study_options = PCRE_STUDY_JIT_COMPILE;
1295 ph10 691 }
1296 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1297 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1298 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1299     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1300 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1301 nigel 79 #if !defined NODFA
1302 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1303 nigel 79 #endif
1304 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1305 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1306     *endptr == 0))
1307 nigel 53 {
1308     op++;
1309     argc--;
1310     }
1311 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1312     {
1313     int both = argv[op][2] == 0;
1314     int temp;
1315     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1316     *endptr == 0))
1317     {
1318     timeitm = temp;
1319     op++;
1320     argc--;
1321     }
1322     else timeitm = LOOPREPEAT;
1323     if (both) timeit = timeitm;
1324     }
1325 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1326     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1327     *endptr == 0))
1328     {
1329 ph10 641 #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1330 nigel 91 printf("PCRE: -S not supported on this OS\n");
1331     exit(1);
1332     #else
1333     int rc;
1334     struct rlimit rlim;
1335     getrlimit(RLIMIT_STACK, &rlim);
1336     rlim.rlim_cur = stack_size * 1024 * 1024;
1337     rc = setrlimit(RLIMIT_STACK, &rlim);
1338     if (rc != 0)
1339     {
1340     printf("PCRE: setrlimit() failed with error %d\n", rc);
1341     exit(1);
1342     }
1343     op++;
1344     argc--;
1345     #endif
1346     }
1347 nigel 53 #if !defined NOPOSIX
1348 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1349 nigel 53 #endif
1350 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1351     {
1352     int rc;
1353 ph10 392 unsigned long int lrc;
1354 nigel 63 printf("PCRE version %s\n", pcre_version());
1355     printf("Compiled with\n");
1356     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1357     printf(" %sUTF-8 support\n", rc? "" : "No ");
1358 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1359     printf(" %sUnicode properties support\n", rc? "" : "No ");
1360 ph10 667 (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1361 ph10 674 if (rc)
1362 ph10 689 printf(" Just-in-time compiler support\n");
1363 ph10 674 else
1364     printf(" No just-in-time compiler support\n");
1365 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1366 ph10 391 /* Note that these values are always the ASCII values, even
1367 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1368 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1369     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1370 ph10 150 (rc == -2)? "ANYCRLF" :
1371 nigel 93 (rc == -1)? "ANY" : "???");
1372 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1373     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1374     "all Unicode newlines");
1375 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1376     printf(" Internal link size = %d\n", rc);
1377     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1378     printf(" POSIX malloc threshold = %d\n", rc);
1379 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1380     printf(" Default match limit = %ld\n", lrc);
1381     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1382     printf(" Default recursion depth limit = %ld\n", lrc);
1383 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1384     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1385 ph10 121 goto EXIT;
1386 nigel 63 }
1387 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1388     strcmp(argv[op], "--help") == 0)
1389     {
1390     usage();
1391     goto EXIT;
1392     }
1393 nigel 3 else
1394     {
1395 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1396 nigel 93 usage();
1397 nigel 77 yield = 1;
1398     goto EXIT;
1399 nigel 3 }
1400     op++;
1401     argc--;
1402     }
1403    
1404 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1405    
1406     size_offsets_max = size_offsets;
1407 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1408 nigel 53 if (offsets == NULL)
1409     {
1410     printf("** Failed to get %d bytes of memory for offsets vector\n",
1411 ph10 151 (int)(size_offsets_max * sizeof(int)));
1412 nigel 77 yield = 1;
1413     goto EXIT;
1414 nigel 53 }
1415    
1416 nigel 3 /* Sort out the input and output files */
1417    
1418     if (argc > 1)
1419     {
1420 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1421 nigel 3 if (infile == NULL)
1422     {
1423     printf("** Failed to open %s\n", argv[op]);
1424 nigel 77 yield = 1;
1425     goto EXIT;
1426 nigel 3 }
1427     }
1428    
1429     if (argc > 2)
1430     {
1431 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1432 nigel 3 if (outfile == NULL)
1433     {
1434     printf("** Failed to open %s\n", argv[op+1]);
1435 nigel 77 yield = 1;
1436     goto EXIT;
1437 nigel 3 }
1438     }
1439    
1440     /* Set alternative malloc function */
1441    
1442     pcre_malloc = new_malloc;
1443 nigel 73 pcre_free = new_free;
1444     pcre_stack_malloc = stack_malloc;
1445     pcre_stack_free = stack_free;
1446 nigel 3
1447 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1448 nigel 3
1449 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1450 nigel 3
1451     /* Main loop */
1452    
1453 nigel 11 while (!done)
1454 nigel 3 {
1455     pcre *re = NULL;
1456     pcre_extra *extra = NULL;
1457 nigel 37
1458     #if !defined NOPOSIX /* There are still compilers that require no indent */
1459 nigel 3 regex_t preg;
1460 nigel 45 int do_posix = 0;
1461 nigel 37 #endif
1462    
1463 nigel 7 const char *error;
1464 ph10 512 unsigned char *markptr;
1465 nigel 25 unsigned char *p, *pp, *ppp;
1466 nigel 75 unsigned char *to_file = NULL;
1467 nigel 53 const unsigned char *tables = NULL;
1468 nigel 75 unsigned long int true_size, true_study_size = 0;
1469     size_t size, regex_gotten_store;
1470 ph10 654 int do_allcaps = 0;
1471 ph10 512 int do_mark = 0;
1472 nigel 3 int do_study = 0;
1473 ph10 654 int no_force_study = 0;
1474 nigel 25 int do_debug = debug;
1475 nigel 35 int do_G = 0;
1476     int do_g = 0;
1477 nigel 25 int do_showinfo = showinfo;
1478 nigel 35 int do_showrest = 0;
1479 ph10 616 int do_showcaprest = 0;
1480 nigel 75 int do_flip = 0;
1481 nigel 93 int erroroffset, len, delimiter, poffset;
1482 nigel 3
1483 nigel 67 use_utf8 = 0;
1484 ph10 211 debug_lengths = 1;
1485 nigel 63
1486 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1487 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1488 nigel 63 fflush(outfile);
1489 nigel 3
1490     p = buffer;
1491     while (isspace(*p)) p++;
1492     if (*p == 0) continue;
1493    
1494 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1495 nigel 3
1496 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1497     {
1498 nigel 91 unsigned long int magic, get_options;
1499 ph10 756 pcre_uint8 sbuf[8];
1500 nigel 75 FILE *f;
1501    
1502     p++;
1503     pp = p + (int)strlen((char *)p);
1504     while (isspace(pp[-1])) pp--;
1505     *pp = 0;
1506    
1507     f = fopen((char *)p, "rb");
1508     if (f == NULL)
1509     {
1510     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1511     continue;
1512     }
1513    
1514     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1515    
1516     true_size =
1517     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1518     true_study_size =
1519     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1520    
1521     re = (real_pcre *)new_malloc(true_size);
1522     regex_gotten_store = gotten_store;
1523    
1524     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1525    
1526     magic = ((real_pcre *)re)->magic_number;
1527     if (magic != MAGIC_NUMBER)
1528     {
1529     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1530     {
1531     do_flip = 1;
1532     }
1533     else
1534     {
1535     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1536     fclose(f);
1537     continue;
1538     }
1539     }
1540    
1541 ph10 612 fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1542 nigel 75 do_flip? " (byte-inverted)" : "", p);
1543    
1544     /* Need to know if UTF-8 for printing data strings */
1545    
1546 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1547     use_utf8 = (get_options & PCRE_UTF8) != 0;
1548 nigel 75
1549 ph10 612 /* Now see if there is any following study data. */
1550 nigel 75
1551     if (true_study_size != 0)
1552     {
1553     pcre_study_data *psd;
1554    
1555     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1556     extra->flags = PCRE_EXTRA_STUDY_DATA;
1557    
1558     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1559     extra->study_data = psd;
1560    
1561     if (fread(psd, 1, true_study_size, f) != true_study_size)
1562     {
1563     FAIL_READ:
1564     fprintf(outfile, "Failed to read data from %s\n", p);
1565 ph10 667 if (extra != NULL) pcre_free_study(extra);
1566 nigel 75 if (re != NULL) new_free(re);
1567     fclose(f);
1568     continue;
1569     }
1570     fprintf(outfile, "Study data loaded from %s\n", p);
1571     do_study = 1; /* To get the data output if requested */
1572     }
1573     else fprintf(outfile, "No study data\n");
1574    
1575     fclose(f);
1576     goto SHOW_INFO;
1577     }
1578    
1579     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1580     the pattern; if is isn't complete, read more. */
1581    
1582 nigel 3 delimiter = *p++;
1583    
1584 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1585 nigel 3 {
1586 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1587 nigel 3 goto SKIP_DATA;
1588     }
1589    
1590     pp = p;
1591 ph10 530 poffset = (int)(p - buffer);
1592 nigel 3
1593     for(;;)
1594     {
1595 nigel 29 while (*pp != 0)
1596     {
1597     if (*pp == '\\' && pp[1] != 0) pp++;
1598     else if (*pp == delimiter) break;
1599     pp++;
1600     }
1601 nigel 3 if (*pp != 0) break;
1602 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1603 nigel 3 {
1604     fprintf(outfile, "** Unexpected EOF\n");
1605 nigel 11 done = 1;
1606     goto CONTINUE;
1607 nigel 3 }
1608 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1609 nigel 3 }
1610    
1611 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1612     pointer to the correct relative point in the buffer. */
1613    
1614     p = buffer + poffset;
1615    
1616 nigel 29 /* If the first character after the delimiter is backslash, make
1617     the pattern end with backslash. This is purely to provide a way
1618     of testing for the error message when a pattern ends with backslash. */
1619    
1620     if (pp[1] == '\\') *pp++ = '\\';
1621    
1622 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1623     for callouts. */
1624 nigel 3
1625     *pp++ = 0;
1626 nigel 75 strcpy((char *)pbuffer, (char *)p);
1627 nigel 3
1628     /* Look for options after final delimiter */
1629    
1630     options = 0;
1631 nigel 31 log_store = showstore; /* default from command line */
1632    
1633 nigel 3 while (*pp != 0)
1634     {
1635     switch (*pp++)
1636     {
1637 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1638 nigel 35 case 'g': do_g = 1; break;
1639 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1640     case 'm': options |= PCRE_MULTILINE; break;
1641     case 's': options |= PCRE_DOTALL; break;
1642     case 'x': options |= PCRE_EXTENDED; break;
1643 nigel 25
1644 ph10 616 case '+':
1645 ph10 654 if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1646 ph10 616 break;
1647 ph10 654
1648     case '=': do_allcaps = 1; break;
1649 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1650 nigel 93 case 'B': do_debug = 1; break;
1651 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1652 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1653 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1654 nigel 75 case 'F': do_flip = 1; break;
1655 nigel 35 case 'G': do_G = 1; break;
1656 nigel 25 case 'I': do_showinfo = 1; break;
1657 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1658 ph10 512 case 'K': do_mark = 1; break;
1659 nigel 31 case 'M': log_store = 1; break;
1660 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1661 nigel 37
1662     #if !defined NOPOSIX
1663 nigel 3 case 'P': do_posix = 1; break;
1664 nigel 37 #endif
1665    
1666 ph10 654 case 'S':
1667 ph10 691 if (do_study == 0)
1668 ph10 612 {
1669 ph10 691 do_study = 1;
1670 ph10 667 if (*pp == '+')
1671     {
1672     study_options |= PCRE_STUDY_JIT_COMPILE;
1673 ph10 691 pp++;
1674     }
1675     }
1676 ph10 667 else
1677     {
1678 ph10 612 do_study = 0;
1679     no_force_study = 1;
1680 ph10 654 }
1681 ph10 612 break;
1682    
1683 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1684 ph10 535 case 'W': options |= PCRE_UCP; break;
1685 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1686 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1687 ph10 126 case 'Z': debug_lengths = 0; break;
1688 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1689 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1690 ph10 545
1691 ph10 541 case 'T':
1692     switch (*pp++)
1693     {
1694     case '0': tables = tables0; break;
1695     case '1': tables = tables1; break;
1696 ph10 545
1697 ph10 541 case '\r':
1698     case '\n':
1699 ph10 545 case ' ':
1700     case 0:
1701 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1702 ph10 545 goto SKIP_DATA;
1703    
1704     default:
1705 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1706 ph10 545 goto SKIP_DATA;
1707 ph10 541 }
1708 ph10 545 break;
1709 nigel 25
1710     case 'L':
1711     ppp = pp;
1712 nigel 93 /* The '\r' test here is so that it works on Windows. */
1713     /* The '0' test is just in case this is an unterminated line. */
1714     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1715 nigel 25 *ppp = 0;
1716     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1717     {
1718     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1719     goto SKIP_DATA;
1720     }
1721 nigel 93 locale_set = 1;
1722 nigel 25 tables = pcre_maketables();
1723     pp = ppp;
1724     break;
1725    
1726 nigel 75 case '>':
1727     to_file = pp;
1728     while (*pp != 0) pp++;
1729     while (isspace(pp[-1])) pp--;
1730     *pp = 0;
1731     break;
1732    
1733 nigel 91 case '<':
1734     {
1735 ph10 756 if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
1736 ph10 336 {
1737     options |= PCRE_JAVASCRIPT_COMPAT;
1738 ph10 345 pp += 3;
1739 ph10 336 }
1740     else
1741 ph10 345 {
1742 ph10 336 int x = check_newline(pp, outfile);
1743     if (x == 0) goto SKIP_DATA;
1744     options |= x;
1745     while (*pp++ != '>');
1746 ph10 345 }
1747 nigel 91 }
1748     break;
1749    
1750 nigel 77 case '\r': /* So that it works in Windows */
1751     case '\n':
1752     case ' ':
1753     break;
1754 nigel 75
1755 nigel 3 default:
1756     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1757     goto SKIP_DATA;
1758     }
1759     }
1760    
1761 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1762 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1763     local character tables. */
1764 nigel 3
1765 nigel 37 #if !defined NOPOSIX
1766 nigel 3 if (posix || do_posix)
1767     {
1768     int rc;
1769     int cflags = 0;
1770 nigel 75
1771 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1772     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1773 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1774 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1775     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1776 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1777 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1778 nigel 87
1779 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1780    
1781     /* Compilation failed; go back for another re, skipping to blank line
1782     if non-interactive. */
1783    
1784     if (rc != 0)
1785     {
1786 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1787 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1788     goto SKIP_DATA;
1789     }
1790     }
1791    
1792     /* Handle compiling via the native interface */
1793    
1794     else
1795 nigel 37 #endif /* !defined NOPOSIX */
1796    
1797 nigel 3 {
1798 ph10 412 unsigned long int get_options;
1799 ph10 416
1800 nigel 93 if (timeit > 0)
1801 nigel 3 {
1802     register int i;
1803     clock_t time_taken;
1804     clock_t start_time = clock();
1805 nigel 93 for (i = 0; i < timeit; i++)
1806 nigel 3 {
1807 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1808 nigel 3 if (re != NULL) free(re);
1809     }
1810     time_taken = clock() - start_time;
1811 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1812     (((double)time_taken * 1000.0) / (double)timeit) /
1813 nigel 63 (double)CLOCKS_PER_SEC);
1814 nigel 3 }
1815    
1816 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1817 nigel 3
1818     /* Compilation failed; go back for another re, skipping to blank line
1819     if non-interactive. */
1820    
1821     if (re == NULL)
1822     {
1823     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1824     SKIP_DATA:
1825     if (infile != stdin)
1826     {
1827     for (;;)
1828     {
1829 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1830 nigel 11 {
1831     done = 1;
1832     goto CONTINUE;
1833     }
1834 nigel 3 len = (int)strlen((char *)buffer);
1835     while (len > 0 && isspace(buffer[len-1])) len--;
1836     if (len == 0) break;
1837     }
1838     fprintf(outfile, "\n");
1839     }
1840 nigel 25 goto CONTINUE;
1841 nigel 3 }
1842 ph10 416
1843     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1844     within the regex; check for this so that we know how to process the data
1845 ph10 412 lines. */
1846 ph10 416
1847 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1848     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1849 nigel 3
1850 ph10 412 /* Print information if required. There are now two info-returning
1851     functions. The old one has a limited interface and returns only limited
1852     data. Check that it agrees with the newer one. */
1853 nigel 3
1854 nigel 63 if (log_store)
1855     fprintf(outfile, "Memory allocation (code space): %d\n",
1856     (int)(gotten_store -
1857     sizeof(real_pcre) -
1858     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1859    
1860 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1861     and remember the store that was got. */
1862    
1863     true_size = ((real_pcre *)re)->size;
1864     regex_gotten_store = gotten_store;
1865    
1866 ph10 612 /* If -s or /S was present, study the regex to generate additional info to
1867 ph10 654 help with the matching, unless the pattern has the SS option, which
1868 ph10 612 suppresses the effect of /S (used for a few test patterns where studying is
1869     never sensible). */
1870 nigel 75
1871 ph10 667 if (do_study || (force_study >= 0 && !no_force_study))
1872 nigel 75 {
1873 nigel 93 if (timeit > 0)
1874 nigel 75 {
1875     register int i;
1876     clock_t time_taken;
1877     clock_t start_time = clock();
1878 nigel 93 for (i = 0; i < timeit; i++)
1879 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1880 nigel 75 time_taken = clock() - start_time;
1881 ph10 667 if (extra != NULL) pcre_free_study(extra);
1882 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1883     (((double)time_taken * 1000.0) / (double)timeit) /
1884 nigel 75 (double)CLOCKS_PER_SEC);
1885     }
1886 ph10 667 extra = pcre_study(re, study_options | force_study_options, &error);
1887 nigel 75 if (error != NULL)
1888     fprintf(outfile, "Failed to study: %s\n", error);
1889     else if (extra != NULL)
1890     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1891     }
1892 ph10 512
1893 ph10 510 /* If /K was present, we set up for handling MARK data. */
1894 ph10 512
1895 ph10 510 if (do_mark)
1896     {
1897     if (extra == NULL)
1898     {
1899     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1900     extra->flags = 0;
1901     }
1902 ph10 512 extra->mark = &markptr;
1903 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1904 ph10 512 }
1905 nigel 75
1906     /* If the 'F' option was present, we flip the bytes of all the integer
1907     fields in the regex data block and the study block. This is to make it
1908     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1909     compiled on a different architecture. */
1910    
1911     if (do_flip)
1912     {
1913     real_pcre *rre = (real_pcre *)re;
1914 ph10 259 rre->magic_number =
1915 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1916 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1917     rre->options = byteflip(rre->options, sizeof(rre->options));
1918 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1919 ph10 259 rre->top_bracket =
1920 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1921 ph10 259 rre->top_backref =
1922 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1923 ph10 259 rre->first_byte =
1924 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1925 ph10 259 rre->req_byte =
1926 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1927     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1928 nigel 75 sizeof(rre->name_table_offset));
1929 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1930 nigel 75 sizeof(rre->name_entry_size));
1931 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1932 ph10 255 sizeof(rre->name_count));
1933 nigel 75
1934     if (extra != NULL)
1935     {
1936     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1937     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1938 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1939     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1940 nigel 75 }
1941     }
1942    
1943     /* Extract information from the compiled data if required */
1944    
1945     SHOW_INFO:
1946    
1947 nigel 93 if (do_debug)
1948     {
1949     fprintf(outfile, "------------------------------------------------------------------\n");
1950 ph10 116 pcre_printint(re, outfile, debug_lengths);
1951 nigel 93 }
1952 ph10 416
1953 ph10 412 /* We already have the options in get_options (see above) */
1954 nigel 93
1955 nigel 25 if (do_showinfo)
1956 nigel 3 {
1957 ph10 412 unsigned long int all_options;
1958 nigel 79 #if !defined NOINFOCHECK
1959 nigel 43 int old_first_char, old_options, old_count;
1960 nigel 79 #endif
1961 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1962 ph10 227 hascrorlf;
1963 nigel 63 int nameentrysize, namecount;
1964 ph10 756 const pcre_uchar *nametable;
1965 nigel 3
1966 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1967     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1968     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1969 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1970 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1971 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1972     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1973 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1974 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1975     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1976 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1977 nigel 43
1978 nigel 79 #if !defined NOINFOCHECK
1979 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1980 nigel 3 if (count < 0) fprintf(outfile,
1981 nigel 43 "Error %d from pcre_info()\n", count);
1982 nigel 3 else
1983     {
1984 nigel 43 if (old_count != count) fprintf(outfile,
1985     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1986     old_count);
1987 nigel 37
1988 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1989     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1990     first_char, old_first_char);
1991 nigel 37
1992 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1993     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1994     get_options, old_options);
1995 nigel 43 }
1996 nigel 79 #endif
1997 nigel 43
1998 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1999 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
2000 nigel 77 (int)size, (int)regex_gotten_store);
2001 nigel 43
2002     fprintf(outfile, "Capturing subpattern count = %d\n", count);
2003     if (backrefmax > 0)
2004     fprintf(outfile, "Max back reference = %d\n", backrefmax);
2005 nigel 63
2006     if (namecount > 0)
2007     {
2008     fprintf(outfile, "Named capturing subpatterns:\n");
2009     while (namecount-- > 0)
2010     {
2011     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
2012     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
2013     GET2(nametable, 0));
2014     nametable += nameentrysize;
2015     }
2016     }
2017 ph10 172
2018 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
2019 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2020 nigel 63
2021 nigel 75 all_options = ((real_pcre *)re)->options;
2022 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2023 nigel 75
2024 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
2025 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2026 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2027     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2028     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2029     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2030 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2031 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2032 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2033     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2034 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2035     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2036     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2037 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2038 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
2039 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2040 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
2041 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2042 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2043 ph10 172
2044 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
2045 nigel 43
2046 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
2047 nigel 91 {
2048     case PCRE_NEWLINE_CR:
2049     fprintf(outfile, "Forced newline sequence: CR\n");
2050     break;
2051 nigel 43
2052 nigel 91 case PCRE_NEWLINE_LF:
2053     fprintf(outfile, "Forced newline sequence: LF\n");
2054     break;
2055    
2056     case PCRE_NEWLINE_CRLF:
2057     fprintf(outfile, "Forced newline sequence: CRLF\n");
2058     break;
2059    
2060 ph10 149 case PCRE_NEWLINE_ANYCRLF:
2061     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
2062     break;
2063    
2064 nigel 93 case PCRE_NEWLINE_ANY:
2065     fprintf(outfile, "Forced newline sequence: ANY\n");
2066     break;
2067    
2068 nigel 91 default:
2069     break;
2070     }
2071    
2072 nigel 43 if (first_char == -1)
2073     {
2074 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
2075 nigel 43 }
2076     else if (first_char < 0)
2077     {
2078     fprintf(outfile, "No first char\n");
2079     }
2080     else
2081     {
2082 nigel 63 int ch = first_char & 255;
2083 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
2084 nigel 63 "" : " (caseless)";
2085 nigel 93 if (PRINTHEX(ch))
2086 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
2087 nigel 3 else
2088 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2089 nigel 43 }
2090 nigel 37
2091 nigel 43 if (need_char < 0)
2092     {
2093     fprintf(outfile, "No need char\n");
2094 nigel 3 }
2095 nigel 43 else
2096     {
2097 nigel 63 int ch = need_char & 255;
2098 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2099 nigel 63 "" : " (caseless)";
2100 nigel 93 if (PRINTHEX(ch))
2101 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2102 nigel 43 else
2103 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2104 nigel 43 }
2105 nigel 75
2106     /* Don't output study size; at present it is in any case a fixed
2107     value, but it varies, depending on the computer architecture, and
2108     so messes up the test suite. (And with the /F option, it might be
2109 ph10 654 flipped.) If study was forced by an external -s, don't show this
2110 ph10 612 information unless -i or -d was also present. This means that, except
2111     when auto-callouts are involved, the output from runs with and without
2112     -s should be identical. */
2113 nigel 75
2114 ph10 667 if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2115 nigel 75 {
2116     if (extra == NULL)
2117     fprintf(outfile, "Study returned NULL\n");
2118     else
2119     {
2120 ph10 756 pcre_uint8 *start_bits = NULL;
2121 ph10 455 int minlength;
2122 ph10 461
2123 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2124 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2125    
2126 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2127     if (start_bits == NULL)
2128 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2129 nigel 75 else
2130     {
2131     int i;
2132     int c = 24;
2133     fprintf(outfile, "Starting byte set: ");
2134     for (i = 0; i < 256; i++)
2135     {
2136     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2137     {
2138     if (c > 75)
2139     {
2140     fprintf(outfile, "\n ");
2141     c = 2;
2142     }
2143 nigel 93 if (PRINTHEX(i) && i != ' ')
2144 nigel 75 {
2145     fprintf(outfile, "%c ", i);
2146     c += 2;
2147     }
2148     else
2149     {
2150     fprintf(outfile, "\\x%02x ", i);
2151     c += 5;
2152     }
2153     }
2154     }
2155     fprintf(outfile, "\n");
2156     }
2157     }
2158 ph10 691
2159 ph10 667 /* Show this only if the JIT was set by /S, not by -s. */
2160 ph10 691
2161 ph10 667 if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2162     {
2163 ph10 691 int jit;
2164 ph10 667 new_info(re, extra, PCRE_INFO_JIT, &jit);
2165 ph10 691 if (jit)
2166     fprintf(outfile, "JIT study was successful\n");
2167     else
2168     #ifdef SUPPORT_JIT
2169     fprintf(outfile, "JIT study was not successful\n");
2170 ph10 667 #else
2171 ph10 691 fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2172 ph10 667 #endif
2173 ph10 691 }
2174 nigel 75 }
2175 nigel 3 }
2176    
2177 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2178     that is all. The first 8 bytes of the file are the regex length and then
2179     the study length, in big-endian order. */
2180 nigel 3
2181 nigel 75 if (to_file != NULL)
2182 nigel 3 {
2183 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2184     if (f == NULL)
2185 nigel 3 {
2186 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2187 nigel 3 }
2188 nigel 75 else
2189     {
2190 ph10 756 pcre_uint8 sbuf[8];
2191     sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2192     sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2193     sbuf[2] = (pcre_uint8)((true_size >> 8) & 255);
2194     sbuf[3] = (pcre_uint8)((true_size) & 255);
2195 ph10 259
2196 ph10 756 sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2197     sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2198     sbuf[6] = (pcre_uint8)((true_study_size >> 8) & 255);
2199     sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2200 nigel 3
2201 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2202     fwrite(re, 1, true_size, f) < true_size)
2203     {
2204     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2205     }
2206 nigel 3 else
2207     {
2208 ph10 612 fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2209 ph10 654
2210 ph10 658 /* If there is study data, write it. */
2211 ph10 654
2212 nigel 75 if (extra != NULL)
2213 nigel 3 {
2214 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2215     true_study_size)
2216 nigel 3 {
2217 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2218     strerror(errno));
2219 nigel 3 }
2220 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2221 nigel 3 }
2222     }
2223 nigel 75 fclose(f);
2224 nigel 3 }
2225 nigel 77
2226     new_free(re);
2227 ph10 667 if (extra != NULL) pcre_free_study(extra);
2228 ph10 545 if (locale_set)
2229 ph10 541 {
2230     new_free((void *)tables);
2231     setlocale(LC_CTYPE, "C");
2232 ph10 545 locale_set = 0;
2233     }
2234 nigel 75 continue; /* With next regex */
2235 nigel 3 }
2236 nigel 75 } /* End of non-POSIX compile */
2237 nigel 3
2238     /* Read data lines and test them */
2239    
2240     for (;;)
2241     {
2242 ph10 756 pcre_uint8 *q;
2243     pcre_uint8 *bptr;
2244 nigel 57 int *use_offsets = offsets;
2245 nigel 53 int use_size_offsets = size_offsets;
2246 nigel 63 int callout_data = 0;
2247     int callout_data_set = 0;
2248 nigel 3 int count, c;
2249 nigel 29 int copystrings = 0;
2250 ph10 386 int find_match_limit = default_find_match_limit;
2251 nigel 29 int getstrings = 0;
2252     int getlist = 0;
2253 nigel 39 int gmatched = 0;
2254 nigel 35 int start_offset = 0;
2255 ph10 579 int start_offset_sign = 1;
2256 nigel 41 int g_notempty = 0;
2257 nigel 77 int use_dfa = 0;
2258 nigel 3
2259     options = 0;
2260    
2261 nigel 91 *copynames = 0;
2262     *getnames = 0;
2263    
2264     copynamesptr = copynames;
2265     getnamesptr = getnames;
2266    
2267 nigel 63 pcre_callout = callout;
2268     first_callout = 1;
2269 ph10 654 last_callout_mark = NULL;
2270 nigel 63 callout_extra = 0;
2271     callout_count = 0;
2272     callout_fail_count = 999999;
2273     callout_fail_id = -1;
2274 nigel 73 show_malloc = 0;
2275 nigel 63
2276 nigel 91 if (extra != NULL) extra->flags &=
2277     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2278    
2279     len = 0;
2280     for (;;)
2281 nigel 11 {
2282 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2283 nigel 91 {
2284 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2285     {
2286 ph10 545 fprintf(outfile, "\n");
2287 ph10 537 break;
2288 ph10 545 }
2289 nigel 91 done = 1;
2290     goto CONTINUE;
2291     }
2292     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2293     len = (int)strlen((char *)buffer);
2294     if (buffer[len-1] == '\n') break;
2295 nigel 11 }
2296 nigel 3
2297     while (len > 0 && isspace(buffer[len-1])) len--;
2298     buffer[len] = 0;
2299     if (len == 0) break;
2300    
2301     p = buffer;
2302     while (isspace(*p)) p++;
2303    
2304 ph10 147 bptr = q = dbuffer;
2305 nigel 3 while ((c = *p++) != 0)
2306     {
2307     int i = 0;
2308     int n = 0;
2309 nigel 63
2310 nigel 3 if (c == '\\') switch ((c = *p++))
2311     {
2312     case 'a': c = 7; break;
2313     case 'b': c = '\b'; break;
2314     case 'e': c = 27; break;
2315     case 'f': c = '\f'; break;
2316     case 'n': c = '\n'; break;
2317     case 'r': c = '\r'; break;
2318     case 't': c = '\t'; break;
2319     case 'v': c = '\v'; break;
2320    
2321     case '0': case '1': case '2': case '3':
2322     case '4': case '5': case '6': case '7':
2323     c -= '0';
2324     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2325     c = c * 8 + *p++ - '0';
2326 nigel 91
2327     #if !defined NOUTF8
2328     if (use_utf8 && c > 255)
2329     {
2330     unsigned char buff8[8];
2331     int ii, utn;
2332     utn = ord2utf8(c, buff8);
2333     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2334     c = buff8[ii]; /* Last byte */
2335     }
2336     #endif
2337 nigel 3 break;
2338    
2339     case 'x':
2340 nigel 49
2341     /* Handle \x{..} specially - new Perl thing for utf8 */
2342    
2343 nigel 79 #if !defined NOUTF8
2344 nigel 49 if (*p == '{')
2345     {
2346     unsigned char *pt = p;
2347     c = 0;
2348 ph10 738
2349 ph10 735 /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2350     when isxdigit() is a macro that refers to its argument more than
2351     once. This is banned by the C Standard, but apparently happens in at
2352     least one MacOS environment. */
2353 ph10 738
2354 ph10 735 for (pt++; isxdigit(*pt); pt++)
2355 ph10 734 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2356 nigel 49 if (*pt == '}')
2357     {
2358 nigel 67 unsigned char buff8[8];
2359 nigel 49 int ii, utn;
2360 ph10 355 if (use_utf8)
2361 ph10 358 {
2362 ph10 355 utn = ord2utf8(c, buff8);
2363     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2364     c = buff8[ii]; /* Last byte */
2365     }
2366     else
2367     {
2368 ph10 358 if (c > 255)
2369 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2370     "UTF-8 mode is not enabled.\n"
2371     "** Truncation will probably give the wrong result.\n", c);
2372 ph10 358 }
2373 nigel 49 p = pt + 1;
2374     break;
2375     }
2376     /* Not correct form; fall through */
2377     }
2378 nigel 79 #endif
2379 nigel 49
2380     /* Ordinary \x */
2381    
2382 nigel 3 c = 0;
2383     while (i++ < 2 && isxdigit(*p))
2384     {
2385 ph10 734 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2386 nigel 3 p++;
2387     }
2388     break;
2389    
2390 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2391 nigel 3 p--;
2392     continue;
2393    
2394 nigel 75 case '>':
2395 ph10 579 if (*p == '-')
2396 ph10 567 {
2397     start_offset_sign = -1;
2398     p++;
2399 ph10 579 }
2400 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2401 ph10 579 start_offset *= start_offset_sign;
2402 nigel 75 continue;
2403    
2404 nigel 3 case 'A': /* Option setting */
2405     options |= PCRE_ANCHORED;
2406     continue;
2407    
2408     case 'B':
2409     options |= PCRE_NOTBOL;
2410     continue;
2411    
2412 nigel 29 case 'C':
2413 nigel 63 if (isdigit(*p)) /* Set copy string */
2414     {
2415     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2416     copystrings |= 1 << n;
2417     }
2418     else if (isalnum(*p))
2419     {
2420 ph10 756 pcre_uchar *npp = copynamesptr;
2421 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2422 nigel 91 *npp++ = 0;
2423 nigel 67 *npp = 0;
2424 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2425 nigel 63 if (n < 0)
2426 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2427     copynamesptr = npp;
2428 nigel 63 }
2429     else if (*p == '+')
2430     {
2431     callout_extra = 1;
2432     p++;
2433     }
2434     else if (*p == '-')
2435     {
2436     pcre_callout = NULL;
2437     p++;
2438     }
2439     else if (*p == '!')
2440     {
2441     callout_fail_id = 0;
2442     p++;
2443     while(isdigit(*p))
2444     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2445     callout_fail_count = 0;
2446     if (*p == '!')
2447     {
2448     p++;
2449     while(isdigit(*p))
2450     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2451     }
2452     }
2453     else if (*p == '*')
2454     {
2455     int sign = 1;
2456     callout_data = 0;
2457     if (*(++p) == '-') { sign = -1; p++; }
2458     while(isdigit(*p))
2459     callout_data = callout_data * 10 + *p++ - '0';
2460     callout_data *= sign;
2461     callout_data_set = 1;
2462     }
2463 nigel 29 continue;
2464    
2465 nigel 79 #if !defined NODFA
2466 nigel 77 case 'D':
2467 nigel 79 #if !defined NOPOSIX
2468 nigel 77 if (posix || do_posix)
2469     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2470     else
2471 nigel 79 #endif
2472 nigel 77 use_dfa = 1;
2473     continue;
2474 ph10 553 #endif
2475 nigel 77
2476 ph10 553 #if !defined NODFA
2477 nigel 77 case 'F':
2478     options |= PCRE_DFA_SHORTEST;
2479     continue;
2480 nigel 79 #endif
2481 nigel 77
2482 nigel 29 case 'G':
2483 nigel 63 if (isdigit(*p))
2484     {
2485     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2486     getstrings |= 1 << n;
2487     }
2488     else if (isalnum(*p))
2489     {
2490 ph10 756 pcre_uchar *npp = getnamesptr;
2491 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2492 nigel 91 *npp++ = 0;
2493 nigel 67 *npp = 0;
2494 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2495 nigel 63 if (n < 0)
2496 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2497     getnamesptr = npp;
2498 nigel 63 }
2499 nigel 29 continue;
2500 ph10 691
2501 ph10 667 case 'J':
2502     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2503 ph10 691 if (extra != NULL
2504     && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2505 ph10 667 && extra->executable_jit != NULL)
2506 ph10 691 {
2507 ph10 667 if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2508     jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2509 ph10 675 pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2510 ph10 691 }
2511 ph10 667 continue;
2512 nigel 29
2513     case 'L':
2514     getlist = 1;
2515     continue;
2516    
2517 nigel 63 case 'M':
2518     find_match_limit = 1;
2519     continue;
2520    
2521 nigel 37 case 'N':
2522 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2523     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2524 ph10 461 else
2525 ph10 442 options |= PCRE_NOTEMPTY;
2526 nigel 37 continue;
2527    
2528 nigel 3 case 'O':
2529     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2530 nigel 53 if (n > size_offsets_max)
2531     {
2532     size_offsets_max = n;
2533 nigel 57 free(offsets);
2534 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2535 nigel 53 if (offsets == NULL)
2536     {
2537     printf("** Failed to get %d bytes of memory for offsets vector\n",
2538 ph10 151 (int)(size_offsets_max * sizeof(int)));
2539 nigel 77 yield = 1;
2540     goto EXIT;
2541 nigel 53 }
2542     }
2543     use_size_offsets = n;
2544 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2545 nigel 3 continue;
2546    
2547 nigel 75 case 'P':
2548 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2549 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2550 nigel 75 continue;
2551    
2552 nigel 91 case 'Q':
2553     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2554     if (extra == NULL)
2555     {
2556     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2557     extra->flags = 0;
2558     }
2559     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2560     extra->match_limit_recursion = n;
2561     continue;
2562    
2563     case 'q':
2564     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2565     if (extra == NULL)
2566     {
2567     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2568     extra->flags = 0;
2569     }
2570     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2571     extra->match_limit = n;
2572     continue;
2573    
2574 nigel 79 #if !defined NODFA
2575 nigel 77 case 'R':
2576     options |= PCRE_DFA_RESTART;
2577     continue;
2578 nigel 79 #endif
2579 nigel 77
2580 nigel 73 case 'S':
2581     show_malloc = 1;
2582     continue;
2583 ph10 392
2584 ph10 389 case 'Y':
2585     options |= PCRE_NO_START_OPTIMIZE;
2586 ph10 392 continue;
2587 nigel 73
2588 nigel 3 case 'Z':
2589     options |= PCRE_NOTEOL;
2590     continue;
2591 nigel 71
2592     case '?':
2593     options |= PCRE_NO_UTF8_CHECK;
2594     continue;
2595 nigel 91
2596     case '<':
2597     {
2598     int x = check_newline(p, outfile);
2599     if (x == 0) goto NEXT_DATA;
2600     options |= x;
2601     while (*p++ != '>');
2602     }
2603     continue;
2604 nigel 3 }
2605 nigel 9 *q++ = c;
2606 nigel 3 }
2607 nigel 9 *q = 0;
2608 ph10 530 len = (int)(q - dbuffer);
2609 ph10 545
2610 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2611 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2612 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2613 ph10 371
2614 ph10 363 #if !defined NOPOSIX
2615     if (posix || do_posix)
2616     {
2617     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2618 ph10 371 bptr += buffer_size - len - 1;
2619 ph10 363 }
2620 ph10 371 else
2621     #endif
2622 ph10 363 {
2623     memmove(bptr + buffer_size - len, bptr, len);
2624 ph10 371 bptr += buffer_size - len;
2625     }
2626 nigel 3
2627 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2628     {
2629     printf("**Match limit not relevant for DFA matching: ignored\n");
2630     find_match_limit = 0;
2631     }
2632    
2633 nigel 3 /* Handle matching via the POSIX interface, which does not
2634 nigel 63 support timing or playing with the match limit or callout data. */
2635 nigel 3
2636 nigel 37 #if !defined NOPOSIX
2637 nigel 3 if (posix || do_posix)
2638     {
2639     int rc;
2640     int eflags = 0;
2641 nigel 63 regmatch_t *pmatch = NULL;
2642     if (use_size_offsets > 0)
2643 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2644 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2645     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2646 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2647 nigel 3
2648 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2649 nigel 3
2650     if (rc != 0)
2651     {
2652 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2653 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2654     }
2655 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2656     != 0)
2657     {
2658     fprintf(outfile, "Matched with REG_NOSUB\n");
2659     }
2660 nigel 3 else
2661     {
2662 nigel 7 size_t i;
2663 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2664 nigel 3 {
2665     if (pmatch[i].rm_so >= 0)
2666     {
2667 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2668 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2669     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2670 nigel 3 fprintf(outfile, "\n");
2671 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2672 nigel 35 {
2673 ph10 616 fprintf(outfile, "%2d+ ", (int)i);
2674 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2675     outfile);
2676 nigel 35 fprintf(outfile, "\n");
2677     }
2678 nigel 3 }
2679     }
2680     }
2681 nigel 53 free(pmatch);
2682 nigel 3 }
2683    
2684 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2685 nigel 3
2686 nigel 37 else
2687     #endif /* !defined NOPOSIX */
2688    
2689 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2690 nigel 3 {
2691 ph10 512 markptr = NULL;
2692    
2693 nigel 93 if (timeitm > 0)
2694 nigel 3 {
2695     register int i;
2696     clock_t time_taken;
2697     clock_t start_time = clock();
2698 nigel 77
2699 nigel 79 #if !defined NODFA
2700 nigel 77 if (all_use_dfa || use_dfa)
2701     {
2702     int workspace[1000];
2703 nigel 93 for (i = 0; i < timeitm; i++)
2704 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2705 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2706     sizeof(workspace)/sizeof(int));
2707     }
2708     else
2709 nigel 79 #endif
2710 nigel 77
2711 nigel 93 for (i = 0; i < timeitm; i++)
2712 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2713 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2714 nigel 77
2715 nigel 3 time_taken = clock() - start_time;
2716 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2717     (((double)time_taken * 1000.0) / (double)timeitm) /
2718 nigel 63 (double)CLOCKS_PER_SEC);
2719 nigel 3 }
2720    
2721 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2722 nigel 87 varying limits in order to find the minimum value for the match limit and
2723 ph10 667 for the recursion limit. The match limits are relevant only to the normal
2724     running of pcre_exec(), so disable the JIT optimization. This makes it
2725     possible to run the same set of tests with and without JIT externally
2726     requested. */
2727 nigel 63
2728     if (find_match_limit)
2729     {
2730     if (extra == NULL)
2731     {
2732 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2733 nigel 63 extra->flags = 0;
2734     }
2735 ph10 667 else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
2736 ph10 691
2737 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2738 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2739     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2740     PCRE_ERROR_MATCHLIMIT, "match()");
2741 nigel 63
2742 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2743     options|g_notempty, use_offsets, use_size_offsets,
2744     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2745     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2746 nigel 63 }
2747    
2748     /* If callout_data is set, use the interface with additional data */
2749    
2750     else if (callout_data_set)
2751     {
2752     if (extra == NULL)
2753     {
2754 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2755 nigel 63 extra->flags = 0;
2756     }
2757     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2758 nigel 71 extra->callout_data = &callout_data;
2759 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2760     options | g_notempty, use_offsets, use_size_offsets);
2761     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2762     }
2763    
2764     /* The normal case is just to do the match once, with the default
2765     value of match_limit. */
2766    
2767 nigel 79 #if !defined NODFA
2768 nigel 77 else if (all_use_dfa || use_dfa)
2769     {
2770     int workspace[1000];
2771 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2772 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2773     sizeof(workspace)/sizeof(int));
2774     if (count == 0)
2775     {
2776     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2777     count = use_size_offsets/2;
2778     }
2779     }
2780 nigel 79 #endif
2781 nigel 77
2782 nigel 75 else
2783     {
2784     count = pcre_exec(re, extra, (char *)bptr, len,
2785     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2786 nigel 77 if (count == 0)
2787     {
2788     fprintf(outfile, "Matched, but too many substrings\n");
2789     count = use_size_offsets/3;
2790     }
2791 nigel 75 }
2792 nigel 3
2793 nigel 39 /* Matched */
2794    
2795 nigel 3 if (count >= 0)
2796     {
2797 nigel 93 int i, maxcount;
2798    
2799     #if !defined NODFA
2800     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2801     #endif
2802     maxcount = use_size_offsets/3;
2803    
2804     /* This is a check against a lunatic return value. */
2805    
2806     if (count > maxcount)
2807     {
2808     fprintf(outfile,
2809     "** PCRE error: returned count %d is too big for offset size %d\n",
2810     count, use_size_offsets);
2811     count = use_size_offsets/3;
2812     if (do_g || do_G)
2813     {
2814     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2815     do_g = do_G = FALSE; /* Break g/G loop */
2816     }
2817     }
2818 ph10 654
2819 ph10 626 /* do_allcaps requests showing of all captures in the pattern, to check
2820     unset ones at the end. */
2821 ph10 654
2822 ph10 626 if (do_allcaps)
2823     {
2824     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2825 ph10 654 count++; /* Allow for full match */
2826     if (count * 2 > use_size_offsets) count = use_size_offsets/2;
2827     }
2828 nigel 93
2829 ph10 626 /* Output the captured substrings */
2830 ph10 654
2831 nigel 29 for (i = 0; i < count * 2; i += 2)
2832 nigel 3 {
2833 nigel 57 if (use_offsets[i] < 0)
2834 ph10 654 {
2835 ph10 626 if (use_offsets[i] != -1)
2836     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2837 ph10 654 use_offsets[i], i);
2838 ph10 626 if (use_offsets[i+1] != -1)
2839     fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
2840 ph10 654 use_offsets[i+1], i+1);
2841 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2842 ph10 654 }
2843 nigel 3 else
2844     {
2845     fprintf(outfile, "%2d: ", i/2);
2846 nigel 63 (void)pchars(bptr + use_offsets[i],
2847     use_offsets[i+1] - use_offsets[i], outfile);
2848 nigel 3 fprintf(outfile, "\n");
2849 ph10 616 if (do_showcaprest || (i == 0 && do_showrest))
2850 nigel 35 {
2851 ph10 616 fprintf(outfile, "%2d+ ", i/2);
2852     (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2853     outfile);
2854     fprintf(outfile, "\n");
2855 nigel 35 }
2856 nigel 3 }
2857     }
2858 ph10 512
2859 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2860 nigel 29
2861     for (i = 0; i < 32; i++)
2862     {
2863     if ((copystrings & (1 << i)) != 0)
2864     {
2865 nigel 91 char copybuffer[256];
2866 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2867 nigel 37 i, copybuffer, sizeof(copybuffer));
2868 nigel 29 if (rc < 0)
2869     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2870     else
2871 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2872 nigel 29 }
2873     }
2874    
2875 nigel 91 for (copynamesptr = copynames;
2876     *copynamesptr != 0;
2877     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2878     {
2879     char copybuffer[256];
2880     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2881     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2882     if (rc < 0)
2883     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2884     else
2885     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2886     }
2887    
2888 nigel 29 for (i = 0; i < 32; i++)
2889     {
2890     if ((getstrings & (1 << i)) != 0)
2891     {
2892     const char *substring;
2893 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2894 nigel 29 i, &substring);
2895     if (rc < 0)
2896     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2897     else
2898     {
2899     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2900 nigel 49 pcre_free_substring(substring);
2901 nigel 29 }
2902     }
2903     }
2904    
2905 nigel 91 for (getnamesptr = getnames;
2906     *getnamesptr != 0;
2907     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2908     {
2909     const char *substring;
2910     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2911     count, (char *)getnamesptr, &substring);
2912     if (rc < 0)
2913     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2914     else
2915     {
2916     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2917     pcre_free_substring(substring);
2918     }
2919     }
2920    
2921 nigel 29 if (getlist)
2922     {
2923     const char **stringlist;
2924 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2925 nigel 29 &stringlist);
2926     if (rc < 0)
2927     fprintf(outfile, "get substring list failed %d\n", rc);
2928     else
2929     {
2930     for (i = 0; i < count; i++)
2931     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2932     if (stringlist[i] != NULL)
2933     fprintf(outfile, "string list not terminated by NULL\n");
2934 nigel 49 pcre_free_substring_list(stringlist);
2935 nigel 29 }
2936     }
2937 nigel 39 }
2938 nigel 29
2939 nigel 75 /* There was a partial match */
2940    
2941     else if (count == PCRE_ERROR_PARTIAL)
2942     {
2943 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2944     else fprintf(outfile, "Partial match, mark=%s", markptr);
2945 ph10 426 if (use_size_offsets > 1)
2946     {
2947     fprintf(outfile, ": ");
2948     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2949 ph10 461 outfile);
2950     }
2951 nigel 77 fprintf(outfile, "\n");
2952 nigel 75 break; /* Out of the /g loop */
2953     }
2954    
2955 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2956 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2957     to advance the start offset, and continue. We won't be at the end of the
2958     string - that was checked before setting g_notempty.
2959 nigel 39
2960 ph10 566 Complication arises in the case when the newline convention is "any",
2961 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2962     terminated by CRLF, an advance of one character just passes the \r,
2963 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2964 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2965     newline setting in the pattern; if none was set, use pcre_config() to
2966 ph10 566 find the default.
2967 ph10 144
2968 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2969     character, not one byte. */
2970    
2971 nigel 3 else
2972     {
2973 nigel 41 if (g_notempty != 0)
2974 nigel 35 {
2975 nigel 73 int onechar = 1;
2976 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2977 nigel 57 use_offsets[0] = start_offset;
2978 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2979     {
2980     int d;
2981     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2982 ph10 391 /* Note that these values are always the ASCII ones, even in
2983     EBCDIC environments. CR = 13, NL = 10. */
2984     obits = (d == 13)? PCRE_NEWLINE_CR :
2985     (d == 10)? PCRE_NEWLINE_LF :
2986     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2987 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2988 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2989     }
2990 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2991 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2992 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2993 ph10 149 &&
2994 ph10 143 start_offset < len - 1 &&
2995     bptr[start_offset] == '\r' &&
2996     bptr[start_offset+1] == '\n')
2997 ph10 144 onechar++;
2998 ph10 143 else if (use_utf8)
2999 nigel 73 {
3000     while (start_offset + onechar < len)
3001     {
3002 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3003 ph10 579 onechar++;
3004 nigel 73 }
3005     }
3006     use_offsets[1] = start_offset + onechar;
3007 nigel 35 }
3008 nigel 41 else
3009     {
3010 ph10 598 switch(count)
3011 ph10 654 {
3012 ph10 598 case PCRE_ERROR_NOMATCH:
3013 ph10 512 if (gmatched == 0)
3014 ph10 510 {
3015     if (markptr == NULL) fprintf(outfile, "No match\n");
3016     else fprintf(outfile, "No match, mark = %s\n", markptr);
3017 ph10 512 }
3018 ph10 598 break;
3019 ph10 654
3020 ph10 598 case PCRE_ERROR_BADUTF8:
3021     case PCRE_ERROR_SHORTUTF8:
3022     fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3023     (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3024     if (use_size_offsets >= 2)
3025 ph10 654 fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3026 ph10 598 use_offsets[1]);
3027 ph10 654 fprintf(outfile, "\n");
3028     break;
3029    
3030 ph10 598 default:
3031 ph10 654 if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3032 ph10 604 fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3033 ph10 654 else
3034     fprintf(outfile, "Error %d (Unexpected value)\n", count);
3035 ph10 598 break;
3036 nigel 41 }
3037 ph10 654
3038 nigel 41 break; /* Out of the /g loop */
3039     }
3040 nigel 3 }
3041 nigel 35
3042 nigel 39 /* If not /g or /G we are done */
3043    
3044     if (!do_g && !do_G) break;
3045    
3046 nigel 41 /* If we have matched an empty string, first check to see if we are at
3047 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3048     Perl's /g options does. This turns out to be rather cunning. First we set
3049     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3050 nigel 47 same point. If this fails (picked up above) we advance to the next
3051 ph10 143 character. */
3052 ph10 142
3053 nigel 41 g_notempty = 0;
3054 ph10 142
3055 nigel 57 if (use_offsets[0] == use_offsets[1])
3056 nigel 41 {
3057 nigel 57 if (use_offsets[0] == len) break;
3058 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3059 nigel 41 }
3060 nigel 39
3061     /* For /g, update the start offset, leaving the rest alone */
3062    
3063 ph10 143 if (do_g) start_offset = use_offsets[1];
3064 nigel 39
3065     /* For /G, update the pointer and length */
3066    
3067     else
3068 nigel 35 {
3069 ph10 143 bptr += use_offsets[1];
3070     len -= use_offsets[1];
3071 nigel 35 }
3072 nigel 39 } /* End of loop for /g and /G */
3073 nigel 91
3074     NEXT_DATA: continue;
3075 nigel 39 } /* End of loop for data lines */
3076 nigel 3
3077 nigel 11 CONTINUE:
3078 nigel 37
3079     #if !defined NOPOSIX
3080 nigel 3 if (posix || do_posix) regfree(&preg);
3081 nigel 37 #endif
3082    
3083 nigel 77 if (re != NULL) new_free(re);
3084 ph10 667 if (extra != NULL) pcre_free_study(extra);
3085 ph10 541 if (locale_set)
3086 nigel 25 {
3087 nigel 77 new_free((void *)tables);
3088 nigel 25 setlocale(LC_CTYPE, "C");
3089 nigel 93 locale_set = 0;
3090 nigel 25 }
3091 ph10 691 if (jit_stack != NULL)
3092 ph10 667 {
3093     pcre_jit_stack_free(jit_stack);
3094 ph10 691 jit_stack = NULL;
3095     }
3096 nigel 3 }
3097    
3098 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
3099 nigel 77
3100     EXIT:
3101    
3102     if (infile != NULL && infile != stdin) fclose(infile);
3103     if (outfile != NULL && outfile != stdout) fclose(outfile);
3104    
3105     free(buffer);
3106     free(dbuffer);
3107     free(pbuffer);
3108     free(offsets);
3109    
3110     return yield;
3111 nigel 3 }
3112    
3113 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12