/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 580 - (hide annotations) (download)
Fri Nov 26 11:16:43 2010 UTC (3 years, 7 months ago) by ph10
File MIME type: text/plain
File size: 87639 byte(s)
Patches to avoid build problems in some Borland environments.

1 nigel 3 /*************************************************
2     * PCRE testing program *
3     *************************************************/
4    
5 nigel 63 /* This program was hacked up as a tester for PCRE. I really should have
6     written it more tidily in the first place. Will I ever learn? It has grown and
7 nigel 77 been extended and consequently is now rather, er, *very* untidy in places.
8 nigel 63
9 nigel 75 -----------------------------------------------------------------------------
10     Redistribution and use in source and binary forms, with or without
11     modification, are permitted provided that the following conditions are met:
12    
13     * Redistributions of source code must retain the above copyright notice,
14     this list of conditions and the following disclaimer.
15    
16     * Redistributions in binary form must reproduce the above copyright
17     notice, this list of conditions and the following disclaimer in the
18     documentation and/or other materials provided with the distribution.
19    
20     * Neither the name of the University of Cambridge nor the names of its
21     contributors may be used to endorse or promote products derived from
22     this software without specific prior written permission.
23    
24     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25     AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26     IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27     ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28     LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29     CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30     SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31     INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32     CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33     ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34     POSSIBILITY OF SUCH DAMAGE.
35     -----------------------------------------------------------------------------
36     */
37    
38    
39 ph10 200 #ifdef HAVE_CONFIG_H
40 ph10 236 #include "config.h"
41 ph10 200 #endif
42 ph10 199
43 nigel 3 #include <ctype.h>
44     #include <stdio.h>
45     #include <string.h>
46     #include <stdlib.h>
47     #include <time.h>
48 nigel 25 #include <locale.h>
49 nigel 75 #include <errno.h>
50 nigel 3
51 ph10 287 #ifdef SUPPORT_LIBREADLINE
52 ph10 343 #ifdef HAVE_UNISTD_H
53 ph10 287 #include <unistd.h>
54 ph10 343 #endif
55 ph10 287 #include <readline/readline.h>
56     #include <readline/history.h>
57     #endif
58 nigel 93
59 ph10 287
60 nigel 93 /* A number of things vary for Windows builds. Originally, pcretest opened its
61     input and output without "b"; then I was told that "b" was needed in some
62     environments, so it was added for release 5.0 to both the input and output. (It
63     makes no difference on Unix-like systems.) Later I was told that it is wrong
64     for the input on Windows. I've now abstracted the modes into two macros that
65     are set here, to make it easier to fiddle with them, and removed "b" from the
66     input mode under Windows. */
67    
68     #if defined(_WIN32) || defined(WIN32)
69     #include <io.h> /* For _setmode() */
70     #include <fcntl.h> /* For _O_BINARY */
71     #define INPUT_MODE "r"
72     #define OUTPUT_MODE "wb"
73    
74 ph10 411 #ifndef isatty
75     #define isatty _isatty /* This is what Windows calls them, I'm told, */
76     #endif /* though in some environments they seem to */
77     /* be already defined, hence the #ifndefs. */
78     #ifndef fileno
79 ph10 343 #define fileno _fileno
80 ph10 411 #endif
81 ph10 343
82 ph10 580 /* A user sent this fix for Borland Builder 5 under Windows. */
83    
84     #ifdef __BORLANDC__
85     #define _setmode(handle, mode) setmode(handle, mode)
86     #endif
87    
88     /* Not Windows */
89    
90 nigel 93 #else
91     #include <sys/time.h> /* These two includes are needed */
92     #include <sys/resource.h> /* for setrlimit(). */
93     #define INPUT_MODE "rb"
94     #define OUTPUT_MODE "wb"
95 nigel 91 #endif
96    
97 nigel 93
98 ph10 145 /* We have to include pcre_internal.h because we need the internal info for
99     displaying the results of pcre_study() and we also need to know about the
100     internal macros, structures, and other internal data values; pcretest has
101     "inside information" compared to a program that strictly follows the PCRE API.
102 nigel 37
103 ph10 145 Although pcre_internal.h does itself include pcre.h, we explicitly include it
104     here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
105     appropriately for an application, not for building PCRE. */
106 nigel 77
107 ph10 145 #include "pcre.h"
108 nigel 77 #include "pcre_internal.h"
109    
110 ph10 351 /* We need access to some of the data tables that PCRE uses. So as not to have
111     to keep two copies, we include the source file here, changing the names of the
112     external symbols to prevent clashes. */
113 nigel 77
114 ph10 351 #define _pcre_ucp_gentype ucp_gentype
115 nigel 85 #define _pcre_utf8_table1 utf8_table1
116     #define _pcre_utf8_table1_size utf8_table1_size
117     #define _pcre_utf8_table2 utf8_table2
118     #define _pcre_utf8_table3 utf8_table3
119     #define _pcre_utf8_table4 utf8_table4
120     #define _pcre_utt utt
121     #define _pcre_utt_size utt_size
122 ph10 240 #define _pcre_utt_names utt_names
123 nigel 85 #define _pcre_OP_lengths OP_lengths
124    
125     #include "pcre_tables.c"
126    
127     /* We also need the pcre_printint() function for printing out compiled
128     patterns. This function is in a separate file so that it can be included in
129 ph10 507 pcre_compile.c when that module is compiled with debugging enabled. It needs to
130 ph10 498 know which case is being compiled. */
131 nigel 85
132 ph10 498 #define COMPILING_PCRETEST
133     #include "pcre_printint.src"
134    
135     /* The definition of the macro PRINTABLE, which determines whether to print an
136 nigel 93 output character as-is or as a hex value when showing compiled patterns, is
137 ph10 498 contained in the printint.src file. We uses it here also, in cases when the
138     locale has not been explicitly changed, so as to get consistent output from
139     systems that differ in their output from isprint() even in the "C" locale. */
140 nigel 93
141     #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
142 nigel 85
143 nigel 37 /* It is possible to compile this test program without including support for
144     testing the POSIX interface, though this is not available via the standard
145     Makefile. */
146    
147     #if !defined NOPOSIX
148 nigel 3 #include "pcreposix.h"
149 nigel 37 #endif
150 nigel 3
151 ph10 107 /* It is also possible, for the benefit of the version currently imported into
152     Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
153     interface to the DFA matcher (NODFA), and without the doublecheck of the old
154     "info" function (define NOINFOCHECK). In fact, we automatically cut out the
155     UTF8 support if PCRE is built without it. */
156 nigel 79
157 ph10 107 #ifndef SUPPORT_UTF8
158     #ifndef NOUTF8
159     #define NOUTF8
160     #endif
161     #endif
162 nigel 79
163 ph10 107
164 nigel 85 /* Other parameters */
165    
166 nigel 3 #ifndef CLOCKS_PER_SEC
167     #ifdef CLK_TCK
168     #define CLOCKS_PER_SEC CLK_TCK
169     #else
170     #define CLOCKS_PER_SEC 100
171     #endif
172     #endif
173    
174 nigel 93 /* This is the default loop count for timing. */
175    
176 nigel 75 #define LOOPREPEAT 500000
177 nigel 3
178 nigel 85 /* Static variables */
179    
180 nigel 3 static FILE *outfile;
181     static int log_store = 0;
182 nigel 63 static int callout_count;
183     static int callout_extra;
184     static int callout_fail_count;
185     static int callout_fail_id;
186 ph10 210 static int debug_lengths;
187 nigel 63 static int first_callout;
188 nigel 93 static int locale_set = 0;
189 nigel 73 static int show_malloc;
190 nigel 67 static int use_utf8;
191 nigel 43 static size_t gotten_store;
192 nigel 3
193 nigel 91 /* The buffers grow automatically if very long input lines are encountered. */
194    
195     static int buffer_size = 50000;
196     static uschar *buffer = NULL;
197     static uschar *dbuffer = NULL;
198 nigel 75 static uschar *pbuffer = NULL;
199 nigel 3
200 nigel 75
201 ph10 541 /*************************************************
202     * Alternate character tables *
203     *************************************************/
204 nigel 49
205 ph10 545 /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
206     using the default tables of the library. However, the T option can be used to
207     select alternate sets of tables, for different kinds of testing. Note also that
208 ph10 541 the L (locale) option also adjusts the tables. */
209    
210 ph10 545 /* This is the set of tables distributed as default with PCRE. It recognizes
211 ph10 541 only ASCII characters. */
212    
213     static const unsigned char tables0[] = {
214    
215     /* This table is a lower casing table. */
216    
217     0, 1, 2, 3, 4, 5, 6, 7,
218     8, 9, 10, 11, 12, 13, 14, 15,
219     16, 17, 18, 19, 20, 21, 22, 23,
220     24, 25, 26, 27, 28, 29, 30, 31,
221     32, 33, 34, 35, 36, 37, 38, 39,
222     40, 41, 42, 43, 44, 45, 46, 47,
223     48, 49, 50, 51, 52, 53, 54, 55,
224     56, 57, 58, 59, 60, 61, 62, 63,
225     64, 97, 98, 99,100,101,102,103,
226     104,105,106,107,108,109,110,111,
227     112,113,114,115,116,117,118,119,
228     120,121,122, 91, 92, 93, 94, 95,
229     96, 97, 98, 99,100,101,102,103,
230     104,105,106,107,108,109,110,111,
231     112,113,114,115,116,117,118,119,
232     120,121,122,123,124,125,126,127,
233     128,129,130,131,132,133,134,135,
234     136,137,138,139,140,141,142,143,
235     144,145,146,147,148,149,150,151,
236     152,153,154,155,156,157,158,159,
237     160,161,162,163,164,165,166,167,
238     168,169,170,171,172,173,174,175,
239     176,177,178,179,180,181,182,183,
240     184,185,186,187,188,189,190,191,
241     192,193,194,195,196,197,198,199,
242     200,201,202,203,204,205,206,207,
243     208,209,210,211,212,213,214,215,
244     216,217,218,219,220,221,222,223,
245     224,225,226,227,228,229,230,231,
246     232,233,234,235,236,237,238,239,
247     240,241,242,243,244,245,246,247,
248     248,249,250,251,252,253,254,255,
249    
250     /* This table is a case flipping table. */
251    
252     0, 1, 2, 3, 4, 5, 6, 7,
253     8, 9, 10, 11, 12, 13, 14, 15,
254     16, 17, 18, 19, 20, 21, 22, 23,
255     24, 25, 26, 27, 28, 29, 30, 31,
256     32, 33, 34, 35, 36, 37, 38, 39,
257     40, 41, 42, 43, 44, 45, 46, 47,
258     48, 49, 50, 51, 52, 53, 54, 55,
259     56, 57, 58, 59, 60, 61, 62, 63,
260     64, 97, 98, 99,100,101,102,103,
261     104,105,106,107,108,109,110,111,
262     112,113,114,115,116,117,118,119,
263     120,121,122, 91, 92, 93, 94, 95,
264     96, 65, 66, 67, 68, 69, 70, 71,
265     72, 73, 74, 75, 76, 77, 78, 79,
266     80, 81, 82, 83, 84, 85, 86, 87,
267     88, 89, 90,123,124,125,126,127,
268     128,129,130,131,132,133,134,135,
269     136,137,138,139,140,141,142,143,
270     144,145,146,147,148,149,150,151,
271     152,153,154,155,156,157,158,159,
272     160,161,162,163,164,165,166,167,
273     168,169,170,171,172,173,174,175,
274     176,177,178,179,180,181,182,183,
275     184,185,186,187,188,189,190,191,
276     192,193,194,195,196,197,198,199,
277     200,201,202,203,204,205,206,207,
278     208,209,210,211,212,213,214,215,
279     216,217,218,219,220,221,222,223,
280     224,225,226,227,228,229,230,231,
281     232,233,234,235,236,237,238,239,
282     240,241,242,243,244,245,246,247,
283     248,249,250,251,252,253,254,255,
284    
285     /* This table contains bit maps for various character classes. Each map is 32
286     bytes long and the bits run from the least significant end of each byte. The
287     classes that have their own maps are: space, xdigit, digit, upper, lower, word,
288     graph, print, punct, and cntrl. Other classes are built from combinations. */
289    
290     0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
291     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
292     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
293     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294    
295     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
296     0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
297     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
299    
300     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
301     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
302     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
304    
305     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306     0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
307     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
308     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
309    
310     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
312     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
313     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
314    
315     0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
316     0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
317     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
318     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
319    
320     0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
321     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
322     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
323     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
324    
325     0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
326     0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
327     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
328     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
329    
330     0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
331     0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
332     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
333     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
334    
335     0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
336     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
337     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
338     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
339    
340     /* This table identifies various classes of character by individual bits:
341     0x01 white space character
342     0x02 letter
343     0x04 decimal digit
344     0x08 hexadecimal digit
345     0x10 alphanumeric or '_'
346     0x80 regular expression metacharacter or binary zero
347     */
348    
349     0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
350     0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */
351     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
352     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
353     0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
354     0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
355     0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
356     0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
357     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
358     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
359     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
360     0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
361     0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
362     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
363     0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
364     0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
365     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
366     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
367     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
368     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
369     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
370     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
371     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
372     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
373     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
374     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
375     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
376     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
377     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
378     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
379     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
380     0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
381    
382 ph10 545 /* This is a set of tables that came orginally from a Windows user. It seems to
383     be at least an approximation of ISO 8859. In particular, there are characters
384 ph10 541 greater than 128 that are marked as spaces, letters, etc. */
385    
386     static const unsigned char tables1[] = {
387     0,1,2,3,4,5,6,7,
388     8,9,10,11,12,13,14,15,
389     16,17,18,19,20,21,22,23,
390     24,25,26,27,28,29,30,31,
391     32,33,34,35,36,37,38,39,
392     40,41,42,43,44,45,46,47,
393     48,49,50,51,52,53,54,55,
394     56,57,58,59,60,61,62,63,
395     64,97,98,99,100,101,102,103,
396     104,105,106,107,108,109,110,111,
397     112,113,114,115,116,117,118,119,
398     120,121,122,91,92,93,94,95,
399     96,97,98,99,100,101,102,103,
400     104,105,106,107,108,109,110,111,
401     112,113,114,115,116,117,118,119,
402     120,121,122,123,124,125,126,127,
403     128,129,130,131,132,133,134,135,
404     136,137,138,139,140,141,142,143,
405     144,145,146,147,148,149,150,151,
406     152,153,154,155,156,157,158,159,
407     160,161,162,163,164,165,166,167,
408     168,169,170,171,172,173,174,175,
409     176,177,178,179,180,181,182,183,
410     184,185,186,187,188,189,190,191,
411     224,225,226,227,228,229,230,231,
412     232,233,234,235,236,237,238,239,
413     240,241,242,243,244,245,246,215,
414     248,249,250,251,252,253,254,223,
415     224,225,226,227,228,229,230,231,
416     232,233,234,235,236,237,238,239,
417     240,241,242,243,244,245,246,247,
418     248,249,250,251,252,253,254,255,
419     0,1,2,3,4,5,6,7,
420     8,9,10,11,12,13,14,15,
421     16,17,18,19,20,21,22,23,
422     24,25,26,27,28,29,30,31,
423     32,33,34,35,36,37,38,39,
424     40,41,42,43,44,45,46,47,
425     48,49,50,51,52,53,54,55,
426     56,57,58,59,60,61,62,63,
427     64,97,98,99,100,101,102,103,
428     104,105,106,107,108,109,110,111,
429     112,113,114,115,116,117,118,119,
430     120,121,122,91,92,93,94,95,
431     96,65,66,67,68,69,70,71,
432     72,73,74,75,76,77,78,79,
433     80,81,82,83,84,85,86,87,
434     88,89,90,123,124,125,126,127,
435     128,129,130,131,132,133,134,135,
436     136,137,138,139,140,141,142,143,
437     144,145,146,147,148,149,150,151,
438     152,153,154,155,156,157,158,159,
439     160,161,162,163,164,165,166,167,
440     168,169,170,171,172,173,174,175,
441     176,177,178,179,180,181,182,183,
442     184,185,186,187,188,189,190,191,
443     224,225,226,227,228,229,230,231,
444     232,233,234,235,236,237,238,239,
445     240,241,242,243,244,245,246,215,
446     248,249,250,251,252,253,254,223,
447     192,193,194,195,196,197,198,199,
448     200,201,202,203,204,205,206,207,
449     208,209,210,211,212,213,214,247,
450     216,217,218,219,220,221,222,255,
451     0,62,0,0,1,0,0,0,
452     0,0,0,0,0,0,0,0,
453     32,0,0,0,1,0,0,0,
454     0,0,0,0,0,0,0,0,
455     0,0,0,0,0,0,255,3,
456     126,0,0,0,126,0,0,0,
457     0,0,0,0,0,0,0,0,
458     0,0,0,0,0,0,0,0,
459     0,0,0,0,0,0,255,3,
460     0,0,0,0,0,0,0,0,
461     0,0,0,0,0,0,12,2,
462     0,0,0,0,0,0,0,0,
463     0,0,0,0,0,0,0,0,
464     254,255,255,7,0,0,0,0,
465     0,0,0,0,0,0,0,0,
466     255,255,127,127,0,0,0,0,
467     0,0,0,0,0,0,0,0,
468     0,0,0,0,254,255,255,7,
469     0,0,0,0,0,4,32,4,
470     0,0,0,128,255,255,127,255,
471     0,0,0,0,0,0,255,3,
472     254,255,255,135,254,255,255,7,
473     0,0,0,0,0,4,44,6,
474     255,255,127,255,255,255,127,255,
475     0,0,0,0,254,255,255,255,
476     255,255,255,255,255,255,255,127,
477     0,0,0,0,254,255,255,255,
478     255,255,255,255,255,255,255,255,
479     0,2,0,0,255,255,255,255,
480     255,255,255,255,255,255,255,127,
481     0,0,0,0,255,255,255,255,
482     255,255,255,255,255,255,255,255,
483     0,0,0,0,254,255,0,252,
484     1,0,0,248,1,0,0,120,
485     0,0,0,0,254,255,255,255,
486     0,0,128,0,0,0,128,0,
487     255,255,255,255,0,0,0,0,
488     0,0,0,0,0,0,0,128,
489     255,255,255,255,0,0,0,0,
490     0,0,0,0,0,0,0,0,
491     128,0,0,0,0,0,0,0,
492     0,1,1,0,1,1,0,0,
493     0,0,0,0,0,0,0,0,
494     0,0,0,0,0,0,0,0,
495     1,0,0,0,128,0,0,0,
496     128,128,128,128,0,0,128,0,
497     28,28,28,28,28,28,28,28,
498     28,28,0,0,0,0,0,128,
499     0,26,26,26,26,26,26,18,
500     18,18,18,18,18,18,18,18,
501     18,18,18,18,18,18,18,18,
502     18,18,18,128,128,0,128,16,
503     0,26,26,26,26,26,26,18,
504     18,18,18,18,18,18,18,18,
505     18,18,18,18,18,18,18,18,
506     18,18,18,128,128,0,0,0,
507     0,0,0,0,0,1,0,0,
508     0,0,0,0,0,0,0,0,
509     0,0,0,0,0,0,0,0,
510     0,0,0,0,0,0,0,0,
511     1,0,0,0,0,0,0,0,
512     0,0,18,0,0,0,0,0,
513     0,0,20,20,0,18,0,0,
514     0,20,18,0,0,0,0,0,
515     18,18,18,18,18,18,18,18,
516     18,18,18,18,18,18,18,18,
517     18,18,18,18,18,18,18,0,
518     18,18,18,18,18,18,18,18,
519     18,18,18,18,18,18,18,18,
520     18,18,18,18,18,18,18,18,
521     18,18,18,18,18,18,18,0,
522     18,18,18,18,18,18,18,18
523     };
524    
525    
526    
527 ph10 558
528     #ifndef HAVE_STRERROR
529 nigel 49 /*************************************************
530 ph10 558 * Provide strerror() for non-ANSI libraries *
531     *************************************************/
532    
533     /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
534     in their libraries, but can provide the same facility by this simple
535     alternative function. */
536    
537     extern int sys_nerr;
538     extern char *sys_errlist[];
539    
540     char *
541     strerror(int n)
542     {
543     if (n < 0 || n >= sys_nerr) return "unknown error number";
544     return sys_errlist[n];
545     }
546     #endif /* HAVE_STRERROR */
547    
548    
549    
550    
551     /*************************************************
552 nigel 91 * Read or extend an input line *
553     *************************************************/
554    
555     /* Input lines are read into buffer, but both patterns and data lines can be
556     continued over multiple input lines. In addition, if the buffer fills up, we
557     want to automatically expand it so as to be able to handle extremely large
558     lines that are needed for certain stress tests. When the input buffer is
559     expanded, the other two buffers must also be expanded likewise, and the
560     contents of pbuffer, which are a copy of the input for callouts, must be
561     preserved (for when expansion happens for a data line). This is not the most
562     optimal way of handling this, but hey, this is just a test program!
563    
564     Arguments:
565     f the file to read
566     start where in buffer to start (this *must* be within buffer)
567 ph10 287 prompt for stdin or readline()
568 nigel 91
569     Returns: pointer to the start of new data
570     could be a copy of start, or could be moved
571     NULL if no data read and EOF reached
572     */
573    
574     static uschar *
575 ph10 287 extend_inputline(FILE *f, uschar *start, const char *prompt)
576 nigel 91 {
577     uschar *here = start;
578    
579     for (;;)
580     {
581 ph10 530 int rlen = (int)(buffer_size - (here - buffer));
582 nigel 93
583 nigel 91 if (rlen > 1000)
584     {
585     int dlen;
586 ph10 289
587 ph10 287 /* If libreadline support is required, use readline() to read a line if the
588     input is a terminal. Note that readline() removes the trailing newline, so
589     we must put it back again, to be compatible with fgets(). */
590 ph10 289
591 ph10 287 #ifdef SUPPORT_LIBREADLINE
592     if (isatty(fileno(f)))
593     {
594 ph10 289 size_t len;
595 ph10 287 char *s = readline(prompt);
596     if (s == NULL) return (here == start)? NULL : start;
597     len = strlen(s);
598 ph10 289 if (len > 0) add_history(s);
599 ph10 287 if (len > rlen - 1) len = rlen - 1;
600     memcpy(here, s, len);
601     here[len] = '\n';
602 ph10 289 here[len+1] = 0;
603     free(s);
604 ph10 287 }
605 ph10 289 else
606     #endif
607    
608 ph10 287 /* Read the next line by normal means, prompting if the file is stdin. */
609 ph10 289
610 ph10 287 {
611 ph10 516 if (f == stdin) printf("%s", prompt);
612 ph10 287 if (fgets((char *)here, rlen, f) == NULL)
613     return (here == start)? NULL : start;
614 ph10 289 }
615    
616 nigel 91 dlen = (int)strlen((char *)here);
617     if (dlen > 0 && here[dlen - 1] == '\n') return start;
618     here += dlen;
619     }
620    
621     else
622     {
623     int new_buffer_size = 2*buffer_size;
624     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
625     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
626     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
627    
628     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
629     {
630     fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
631     exit(1);
632     }
633    
634     memcpy(new_buffer, buffer, buffer_size);
635     memcpy(new_pbuffer, pbuffer, buffer_size);
636    
637     buffer_size = new_buffer_size;
638    
639     start = new_buffer + (start - buffer);
640     here = new_buffer + (here - buffer);
641    
642     free(buffer);
643     free(dbuffer);
644     free(pbuffer);
645    
646     buffer = new_buffer;
647     dbuffer = new_dbuffer;
648     pbuffer = new_pbuffer;
649     }
650     }
651    
652     return NULL; /* Control never gets here */
653     }
654    
655    
656    
657    
658    
659    
660    
661     /*************************************************
662 nigel 63 * Read number from string *
663     *************************************************/
664    
665     /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
666     around with conditional compilation, just do the job by hand. It is only used
667 nigel 93 for unpicking arguments, so just keep it simple.
668 nigel 63
669     Arguments:
670     str string to be converted
671     endptr where to put the end pointer
672    
673     Returns: the unsigned long
674     */
675    
676     static int
677     get_value(unsigned char *str, unsigned char **endptr)
678     {
679     int result = 0;
680     while(*str != 0 && isspace(*str)) str++;
681     while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
682     *endptr = str;
683     return(result);
684     }
685    
686    
687    
688 nigel 49
689     /*************************************************
690     * Convert UTF-8 string to value *
691     *************************************************/
692    
693     /* This function takes one or more bytes that represents a UTF-8 character,
694     and returns the value of the character.
695    
696     Argument:
697 nigel 91 utf8bytes a pointer to the byte vector
698     vptr a pointer to an int to receive the value
699 nigel 49
700 nigel 91 Returns: > 0 => the number of bytes consumed
701     -6 to 0 => malformed UTF-8 character at offset = (-return)
702 nigel 49 */
703    
704 nigel 79 #if !defined NOUTF8
705    
706 nigel 67 static int
707 nigel 91 utf82ord(unsigned char *utf8bytes, int *vptr)
708 nigel 49 {
709 nigel 91 int c = *utf8bytes++;
710 nigel 49 int d = c;
711     int i, j, s;
712    
713     for (i = -1; i < 6; i++) /* i is number of additional bytes */
714     {
715     if ((d & 0x80) == 0) break;
716     d <<= 1;
717     }
718    
719     if (i == -1) { *vptr = c; return 1; } /* ascii character */
720     if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
721    
722     /* i now has a value in the range 1-5 */
723    
724 nigel 59 s = 6*i;
725 nigel 85 d = (c & utf8_table3[i]) << s;
726 nigel 49
727     for (j = 0; j < i; j++)
728     {
729 nigel 91 c = *utf8bytes++;
730 nigel 49 if ((c & 0xc0) != 0x80) return -(j+1);
731 nigel 59 s -= 6;
732 nigel 49 d |= (c & 0x3f) << s;
733     }
734    
735     /* Check that encoding was the correct unique one */
736    
737 nigel 85 for (j = 0; j < utf8_table1_size; j++)
738     if (d <= utf8_table1[j]) break;
739 nigel 49 if (j != i) return -(i+1);
740    
741     /* Valid value */
742    
743     *vptr = d;
744     return i+1;
745     }
746    
747 nigel 79 #endif
748 nigel 49
749    
750 nigel 79
751 nigel 63 /*************************************************
752 nigel 85 * Convert character value to UTF-8 *
753     *************************************************/
754    
755     /* This function takes an integer value in the range 0 - 0x7fffffff
756     and encodes it as a UTF-8 character in 0 to 6 bytes.
757    
758     Arguments:
759     cvalue the character value
760 nigel 91 utf8bytes pointer to buffer for result - at least 6 bytes long
761 nigel 85
762     Returns: number of characters placed in the buffer
763     */
764    
765 nigel 93 #if !defined NOUTF8
766    
767 nigel 85 static int
768 nigel 91 ord2utf8(int cvalue, uschar *utf8bytes)
769 nigel 85 {
770     register int i, j;
771     for (i = 0; i < utf8_table1_size; i++)
772     if (cvalue <= utf8_table1[i]) break;
773 nigel 91 utf8bytes += i;
774 nigel 85 for (j = i; j > 0; j--)
775     {
776 nigel 91 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
777 nigel 85 cvalue >>= 6;
778     }
779 nigel 91 *utf8bytes = utf8_table2[i] | cvalue;
780 nigel 85 return i + 1;
781     }
782    
783 nigel 93 #endif
784 nigel 85
785    
786 nigel 93
787 nigel 85 /*************************************************
788 nigel 63 * Print character string *
789     *************************************************/
790 nigel 49
791 nigel 63 /* Character string printing function. Must handle UTF-8 strings in utf8
792     mode. Yields number of characters printed. If handed a NULL file, just counts
793     chars without printing. */
794 nigel 49
795 nigel 63 static int pchars(unsigned char *p, int length, FILE *f)
796 nigel 3 {
797 nigel 85 int c = 0;
798 nigel 63 int yield = 0;
799 nigel 3
800 nigel 63 while (length-- > 0)
801 nigel 3 {
802 nigel 79 #if !defined NOUTF8
803 nigel 67 if (use_utf8)
804 nigel 63 {
805     int rc = utf82ord(p, &c);
806 nigel 3
807 nigel 63 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
808     {
809     length -= rc - 1;
810     p += rc;
811 nigel 93 if (PRINTHEX(c))
812 nigel 63 {
813     if (f != NULL) fprintf(f, "%c", c);
814     yield++;
815     }
816     else
817     {
818 nigel 93 int n = 4;
819     if (f != NULL) fprintf(f, "\\x{%02x}", c);
820     yield += (n <= 0x000000ff)? 2 :
821     (n <= 0x00000fff)? 3 :
822     (n <= 0x0000ffff)? 4 :
823     (n <= 0x000fffff)? 5 : 6;
824 nigel 63 }
825     continue;
826     }
827     }
828 nigel 79 #endif
829 nigel 3
830 nigel 63 /* Not UTF-8, or malformed UTF-8 */
831    
832 nigel 93 c = *p++;
833     if (PRINTHEX(c))
834 nigel 3 {
835 nigel 63 if (f != NULL) fprintf(f, "%c", c);
836     yield++;
837 nigel 3 }
838 nigel 63 else
839 nigel 3 {
840 nigel 63 if (f != NULL) fprintf(f, "\\x%02x", c);
841     yield += 4;
842     }
843     }
844 nigel 3
845 nigel 63 return yield;
846     }
847 nigel 23
848 nigel 3
849 nigel 23
850 nigel 63 /*************************************************
851     * Callout function *
852     *************************************************/
853 nigel 3
854 nigel 63 /* Called from PCRE as a result of the (?C) item. We print out where we are in
855     the match. Yield zero unless more callouts than the fail count, or the callout
856     data is not zero. */
857 nigel 3
858 nigel 63 static int callout(pcre_callout_block *cb)
859     {
860     FILE *f = (first_callout | callout_extra)? outfile : NULL;
861 nigel 75 int i, pre_start, post_start, subject_length;
862 nigel 3
863 nigel 63 if (callout_extra)
864     {
865     fprintf(f, "Callout %d: last capture = %d\n",
866     cb->callout_number, cb->capture_last);
867 nigel 3
868 nigel 63 for (i = 0; i < cb->capture_top * 2; i += 2)
869     {
870     if (cb->offset_vector[i] < 0)
871     fprintf(f, "%2d: <unset>\n", i/2);
872     else
873     {
874     fprintf(f, "%2d: ", i/2);
875     (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
876     cb->offset_vector[i+1] - cb->offset_vector[i], f);
877     fprintf(f, "\n");
878     }
879     }
880     }
881 nigel 3
882 nigel 63 /* Re-print the subject in canonical form, the first time or if giving full
883     datails. On subsequent calls in the same match, we use pchars just to find the
884     printed lengths of the substrings. */
885 nigel 3
886 nigel 63 if (f != NULL) fprintf(f, "--->");
887 nigel 3
888 nigel 63 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
889     post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
890     cb->current_position - cb->start_match, f);
891 nigel 3
892 nigel 75 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
893    
894 nigel 63 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
895     cb->subject_length - cb->current_position, f);
896 nigel 3
897 nigel 63 if (f != NULL) fprintf(f, "\n");
898 nigel 9
899 nigel 63 /* Always print appropriate indicators, with callout number if not already
900 nigel 75 shown. For automatic callouts, show the pattern offset. */
901 nigel 3
902 nigel 75 if (cb->callout_number == 255)
903     {
904     fprintf(outfile, "%+3d ", cb->pattern_position);
905     if (cb->pattern_position > 99) fprintf(outfile, "\n ");
906     }
907     else
908     {
909     if (callout_extra) fprintf(outfile, " ");
910     else fprintf(outfile, "%3d ", cb->callout_number);
911     }
912 nigel 3
913 nigel 63 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
914     fprintf(outfile, "^");
915 nigel 3
916 nigel 63 if (post_start > 0)
917     {
918     for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
919     fprintf(outfile, "^");
920 nigel 3 }
921    
922 nigel 75 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
923     fprintf(outfile, " ");
924    
925     fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
926     pbuffer + cb->pattern_position);
927    
928 nigel 63 fprintf(outfile, "\n");
929     first_callout = 0;
930 nigel 3
931 nigel 71 if (cb->callout_data != NULL)
932 nigel 49 {
933 nigel 71 int callout_data = *((int *)(cb->callout_data));
934     if (callout_data != 0)
935     {
936     fprintf(outfile, "Callout data = %d\n", callout_data);
937     return callout_data;
938     }
939 nigel 63 }
940 nigel 49
941 nigel 63 return (cb->callout_number != callout_fail_id)? 0 :
942     (++callout_count >= callout_fail_count)? 1 : 0;
943 nigel 3 }
944    
945    
946 nigel 63 /*************************************************
947 nigel 73 * Local malloc functions *
948 nigel 63 *************************************************/
949 nigel 3
950     /* Alternative malloc function, to test functionality and show the size of the
951     compiled re. */
952    
953     static void *new_malloc(size_t size)
954     {
955 nigel 73 void *block = malloc(size);
956 nigel 43 gotten_store = size;
957 nigel 73 if (show_malloc)
958 nigel 77 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
959 nigel 73 return block;
960 nigel 3 }
961    
962 nigel 73 static void new_free(void *block)
963     {
964     if (show_malloc)
965     fprintf(outfile, "free %p\n", block);
966     free(block);
967     }
968 nigel 3
969    
970 nigel 73 /* For recursion malloc/free, to test stacking calls */
971    
972     static void *stack_malloc(size_t size)
973     {
974     void *block = malloc(size);
975     if (show_malloc)
976 nigel 77 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
977 nigel 73 return block;
978     }
979    
980     static void stack_free(void *block)
981     {
982     if (show_malloc)
983     fprintf(outfile, "stack_free %p\n", block);
984     free(block);
985     }
986    
987    
988 nigel 63 /*************************************************
989     * Call pcre_fullinfo() *
990     *************************************************/
991 nigel 43
992     /* Get one piece of information from the pcre_fullinfo() function */
993    
994     static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
995     {
996     int rc;
997     if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
998     fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
999     }
1000    
1001    
1002    
1003 nigel 63 /*************************************************
1004 nigel 75 * Byte flipping function *
1005     *************************************************/
1006    
1007 nigel 91 static unsigned long int
1008     byteflip(unsigned long int value, int n)
1009 nigel 75 {
1010     if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1011     return ((value & 0x000000ff) << 24) |
1012     ((value & 0x0000ff00) << 8) |
1013     ((value & 0x00ff0000) >> 8) |
1014     ((value & 0xff000000) >> 24);
1015     }
1016    
1017    
1018    
1019    
1020     /*************************************************
1021 nigel 87 * Check match or recursion limit *
1022     *************************************************/
1023    
1024     static int
1025     check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1026     int start_offset, int options, int *use_offsets, int use_size_offsets,
1027     int flag, unsigned long int *limit, int errnumber, const char *msg)
1028     {
1029     int count;
1030     int min = 0;
1031     int mid = 64;
1032     int max = -1;
1033    
1034     extra->flags |= flag;
1035    
1036     for (;;)
1037     {
1038     *limit = mid;
1039    
1040     count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1041     use_offsets, use_size_offsets);
1042    
1043     if (count == errnumber)
1044     {
1045     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1046     min = mid;
1047     mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1048     }
1049    
1050     else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1051     count == PCRE_ERROR_PARTIAL)
1052     {
1053     if (mid == min + 1)
1054     {
1055     fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1056     break;
1057     }
1058     /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1059     max = mid;
1060     mid = (min + mid)/2;
1061     }
1062     else break; /* Some other error */
1063     }
1064    
1065     extra->flags &= ~flag;
1066     return count;
1067     }
1068    
1069    
1070    
1071     /*************************************************
1072 ph10 227 * Case-independent strncmp() function *
1073     *************************************************/
1074    
1075     /*
1076     Arguments:
1077     s first string
1078     t second string
1079     n number of characters to compare
1080    
1081     Returns: < 0, = 0, or > 0, according to the comparison
1082     */
1083    
1084     static int
1085     strncmpic(uschar *s, uschar *t, int n)
1086     {
1087     while (n--)
1088     {
1089     int c = tolower(*s++) - tolower(*t++);
1090     if (c) return c;
1091     }
1092     return 0;
1093     }
1094    
1095    
1096    
1097     /*************************************************
1098 nigel 91 * Check newline indicator *
1099     *************************************************/
1100    
1101 ph10 518 /* This is used both at compile and run-time to check for <xxx> escapes. Print
1102     a message and return 0 if there is no match.
1103 nigel 91
1104     Arguments:
1105     p points after the leading '<'
1106     f file for error message
1107    
1108     Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
1109     */
1110    
1111     static int
1112     check_newline(uschar *p, FILE *f)
1113     {
1114 ph10 227 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1115     if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1116     if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1117     if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1118     if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1119 ph10 231 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1120     if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1121 nigel 91 fprintf(f, "Unknown newline type at: <%s\n", p);
1122     return 0;
1123     }
1124    
1125    
1126    
1127     /*************************************************
1128 nigel 93 * Usage function *
1129     *************************************************/
1130    
1131     static void
1132     usage(void)
1133     {
1134 ph10 287 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
1135     printf("Input and output default to stdin and stdout.\n");
1136     #ifdef SUPPORT_LIBREADLINE
1137     printf("If input is a terminal, readline() is used to read from it.\n");
1138     #else
1139     printf("This version of pcretest is not linked with readline().\n");
1140     #endif
1141     printf("\nOptions:\n");
1142 nigel 93 printf(" -b show compiled code (bytecode)\n");
1143     printf(" -C show PCRE compile-time options and exit\n");
1144     printf(" -d debug: show compiled code and information (-b and -i)\n");
1145     #if !defined NODFA
1146     printf(" -dfa force DFA matching for all subjects\n");
1147     #endif
1148     printf(" -help show usage information\n");
1149     printf(" -i show information about compiled patterns\n"
1150 ph10 386 " -M find MATCH_LIMIT minimum for each subject\n"
1151 nigel 93 " -m output memory used information\n"
1152     " -o <n> set size of offsets vector to <n>\n");
1153     #if !defined NOPOSIX
1154     printf(" -p use POSIX interface\n");
1155     #endif
1156     printf(" -q quiet: do not output PCRE version number at start\n");
1157     printf(" -S <n> set stack size to <n> megabytes\n");
1158     printf(" -s output store (memory) used information\n"
1159     " -t time compilation and execution\n");
1160     printf(" -t <n> time compilation and execution, repeating <n> times\n");
1161     printf(" -tm time execution (matching) only\n");
1162     printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
1163     }
1164    
1165    
1166    
1167     /*************************************************
1168 nigel 63 * Main Program *
1169     *************************************************/
1170 nigel 43
1171 nigel 3 /* Read lines from named file or stdin and write to named file or stdout; lines
1172     consist of a regular expression, in delimiters and optionally followed by
1173     options, followed by a set of test data, terminated by an empty line. */
1174    
1175     int main(int argc, char **argv)
1176     {
1177     FILE *infile = stdin;
1178     int options = 0;
1179     int study_options = 0;
1180 ph10 386 int default_find_match_limit = FALSE;
1181 nigel 3 int op = 1;
1182     int timeit = 0;
1183 nigel 93 int timeitm = 0;
1184 nigel 3 int showinfo = 0;
1185 nigel 31 int showstore = 0;
1186 nigel 87 int quiet = 0;
1187 nigel 53 int size_offsets = 45;
1188     int size_offsets_max;
1189 nigel 77 int *offsets = NULL;
1190 nigel 53 #if !defined NOPOSIX
1191 nigel 3 int posix = 0;
1192 nigel 53 #endif
1193 nigel 3 int debug = 0;
1194 nigel 11 int done = 0;
1195 nigel 77 int all_use_dfa = 0;
1196     int yield = 0;
1197 nigel 91 int stack_size;
1198 nigel 3
1199 nigel 91 /* These vectors store, end-to-end, a list of captured substring names. Assume
1200     that 1024 is plenty long enough for the few names we'll be testing. */
1201 nigel 69
1202 nigel 91 uschar copynames[1024];
1203     uschar getnames[1024];
1204    
1205     uschar *copynamesptr;
1206     uschar *getnamesptr;
1207    
1208 nigel 69 /* Get buffers from malloc() so that Electric Fence will check their misuse
1209 nigel 91 when I am debugging. They grow automatically when very long lines are read. */
1210 nigel 69
1211 nigel 91 buffer = (unsigned char *)malloc(buffer_size);
1212     dbuffer = (unsigned char *)malloc(buffer_size);
1213     pbuffer = (unsigned char *)malloc(buffer_size);
1214 nigel 69
1215 nigel 93 /* The outfile variable is static so that new_malloc can use it. */
1216 nigel 3
1217 nigel 93 outfile = stdout;
1218    
1219     /* The following _setmode() stuff is some Windows magic that tells its runtime
1220     library to translate CRLF into a single LF character. At least, that's what
1221     I've been told: never having used Windows I take this all on trust. Originally
1222     it set 0x8000, but then I was advised that _O_BINARY was better. */
1223    
1224 nigel 75 #if defined(_WIN32) || defined(WIN32)
1225 nigel 93 _setmode( _fileno( stdout ), _O_BINARY );
1226     #endif
1227 nigel 75
1228 nigel 3 /* Scan options */
1229    
1230     while (argc > 1 && argv[op][0] == '-')
1231     {
1232 nigel 63 unsigned char *endptr;
1233 nigel 53
1234 nigel 31 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1235     showstore = 1;
1236 nigel 87 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1237 nigel 93 else if (strcmp(argv[op], "-b") == 0) debug = 1;
1238 nigel 3 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1239     else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1240 ph10 392 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1241 nigel 79 #if !defined NODFA
1242 nigel 77 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1243 nigel 79 #endif
1244 nigel 53 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1245 nigel 65 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1246     *endptr == 0))
1247 nigel 53 {
1248     op++;
1249     argc--;
1250     }
1251 nigel 93 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1252     {
1253     int both = argv[op][2] == 0;
1254     int temp;
1255     if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1256     *endptr == 0))
1257     {
1258     timeitm = temp;
1259     op++;
1260     argc--;
1261     }
1262     else timeitm = LOOPREPEAT;
1263     if (both) timeit = timeitm;
1264     }
1265 nigel 91 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1266     ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1267     *endptr == 0))
1268     {
1269 nigel 93 #if defined(_WIN32) || defined(WIN32)
1270 nigel 91 printf("PCRE: -S not supported on this OS\n");
1271     exit(1);
1272     #else
1273     int rc;
1274     struct rlimit rlim;
1275     getrlimit(RLIMIT_STACK, &rlim);
1276     rlim.rlim_cur = stack_size * 1024 * 1024;
1277     rc = setrlimit(RLIMIT_STACK, &rlim);
1278     if (rc != 0)
1279     {
1280     printf("PCRE: setrlimit() failed with error %d\n", rc);
1281     exit(1);
1282     }
1283     op++;
1284     argc--;
1285     #endif
1286     }
1287 nigel 53 #if !defined NOPOSIX
1288 nigel 3 else if (strcmp(argv[op], "-p") == 0) posix = 1;
1289 nigel 53 #endif
1290 nigel 63 else if (strcmp(argv[op], "-C") == 0)
1291     {
1292     int rc;
1293 ph10 392 unsigned long int lrc;
1294 nigel 63 printf("PCRE version %s\n", pcre_version());
1295     printf("Compiled with\n");
1296     (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1297     printf(" %sUTF-8 support\n", rc? "" : "No ");
1298 nigel 75 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1299     printf(" %sUnicode properties support\n", rc? "" : "No ");
1300 nigel 63 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1301 ph10 391 /* Note that these values are always the ASCII values, even
1302 ph10 392 in EBCDIC environments. CR is 13 and NL is 10. */
1303 ph10 391 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
1304     (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1305 ph10 150 (rc == -2)? "ANYCRLF" :
1306 nigel 93 (rc == -1)? "ANY" : "???");
1307 ph10 231 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1308     printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1309     "all Unicode newlines");
1310 nigel 63 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1311     printf(" Internal link size = %d\n", rc);
1312     (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1313     printf(" POSIX malloc threshold = %d\n", rc);
1314 ph10 376 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1315     printf(" Default match limit = %ld\n", lrc);
1316     (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1317     printf(" Default recursion depth limit = %ld\n", lrc);
1318 nigel 73 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1319     printf(" Match recursion uses %s\n", rc? "stack" : "heap");
1320 ph10 121 goto EXIT;
1321 nigel 63 }
1322 nigel 93 else if (strcmp(argv[op], "-help") == 0 ||
1323     strcmp(argv[op], "--help") == 0)
1324     {
1325     usage();
1326     goto EXIT;
1327     }
1328 nigel 3 else
1329     {
1330 nigel 53 printf("** Unknown or malformed option %s\n", argv[op]);
1331 nigel 93 usage();
1332 nigel 77 yield = 1;
1333     goto EXIT;
1334 nigel 3 }
1335     op++;
1336     argc--;
1337     }
1338    
1339 nigel 53 /* Get the store for the offsets vector, and remember what it was */
1340    
1341     size_offsets_max = size_offsets;
1342 nigel 71 offsets = (int *)malloc(size_offsets_max * sizeof(int));
1343 nigel 53 if (offsets == NULL)
1344     {
1345     printf("** Failed to get %d bytes of memory for offsets vector\n",
1346 ph10 151 (int)(size_offsets_max * sizeof(int)));
1347 nigel 77 yield = 1;
1348     goto EXIT;
1349 nigel 53 }
1350    
1351 nigel 3 /* Sort out the input and output files */
1352    
1353     if (argc > 1)
1354     {
1355 nigel 93 infile = fopen(argv[op], INPUT_MODE);
1356 nigel 3 if (infile == NULL)
1357     {
1358     printf("** Failed to open %s\n", argv[op]);
1359 nigel 77 yield = 1;
1360     goto EXIT;
1361 nigel 3 }
1362     }
1363    
1364     if (argc > 2)
1365     {
1366 nigel 93 outfile = fopen(argv[op+1], OUTPUT_MODE);
1367 nigel 3 if (outfile == NULL)
1368     {
1369     printf("** Failed to open %s\n", argv[op+1]);
1370 nigel 77 yield = 1;
1371     goto EXIT;
1372 nigel 3 }
1373     }
1374    
1375     /* Set alternative malloc function */
1376    
1377     pcre_malloc = new_malloc;
1378 nigel 73 pcre_free = new_free;
1379     pcre_stack_malloc = stack_malloc;
1380     pcre_stack_free = stack_free;
1381 nigel 3
1382 nigel 87 /* Heading line unless quiet, then prompt for first regex if stdin */
1383 nigel 3
1384 nigel 87 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1385 nigel 3
1386     /* Main loop */
1387    
1388 nigel 11 while (!done)
1389 nigel 3 {
1390     pcre *re = NULL;
1391     pcre_extra *extra = NULL;
1392 nigel 37
1393     #if !defined NOPOSIX /* There are still compilers that require no indent */
1394 nigel 3 regex_t preg;
1395 nigel 45 int do_posix = 0;
1396 nigel 37 #endif
1397    
1398 nigel 7 const char *error;
1399 ph10 512 unsigned char *markptr;
1400 nigel 25 unsigned char *p, *pp, *ppp;
1401 nigel 75 unsigned char *to_file = NULL;
1402 nigel 53 const unsigned char *tables = NULL;
1403 nigel 75 unsigned long int true_size, true_study_size = 0;
1404     size_t size, regex_gotten_store;
1405 ph10 512 int do_mark = 0;
1406 nigel 3 int do_study = 0;
1407 nigel 25 int do_debug = debug;
1408 nigel 35 int do_G = 0;
1409     int do_g = 0;
1410 nigel 25 int do_showinfo = showinfo;
1411 nigel 35 int do_showrest = 0;
1412 nigel 75 int do_flip = 0;
1413 nigel 93 int erroroffset, len, delimiter, poffset;
1414 nigel 3
1415 nigel 67 use_utf8 = 0;
1416 ph10 211 debug_lengths = 1;
1417 nigel 63
1418 ph10 287 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1419 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1420 nigel 63 fflush(outfile);
1421 nigel 3
1422     p = buffer;
1423     while (isspace(*p)) p++;
1424     if (*p == 0) continue;
1425    
1426 nigel 75 /* See if the pattern is to be loaded pre-compiled from a file. */
1427 nigel 3
1428 nigel 75 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1429     {
1430 nigel 91 unsigned long int magic, get_options;
1431 nigel 75 uschar sbuf[8];
1432     FILE *f;
1433    
1434     p++;
1435     pp = p + (int)strlen((char *)p);
1436     while (isspace(pp[-1])) pp--;
1437     *pp = 0;
1438    
1439     f = fopen((char *)p, "rb");
1440     if (f == NULL)
1441     {
1442     fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1443     continue;
1444     }
1445    
1446     if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1447    
1448     true_size =
1449     (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1450     true_study_size =
1451     (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1452    
1453     re = (real_pcre *)new_malloc(true_size);
1454     regex_gotten_store = gotten_store;
1455    
1456     if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1457    
1458     magic = ((real_pcre *)re)->magic_number;
1459     if (magic != MAGIC_NUMBER)
1460     {
1461     if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1462     {
1463     do_flip = 1;
1464     }
1465     else
1466     {
1467     fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1468     fclose(f);
1469     continue;
1470     }
1471     }
1472    
1473     fprintf(outfile, "Compiled regex%s loaded from %s\n",
1474     do_flip? " (byte-inverted)" : "", p);
1475    
1476     /* Need to know if UTF-8 for printing data strings */
1477    
1478 nigel 91 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1479     use_utf8 = (get_options & PCRE_UTF8) != 0;
1480 nigel 75
1481     /* Now see if there is any following study data */
1482    
1483     if (true_study_size != 0)
1484     {
1485     pcre_study_data *psd;
1486    
1487     extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1488     extra->flags = PCRE_EXTRA_STUDY_DATA;
1489    
1490     psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1491     extra->study_data = psd;
1492    
1493     if (fread(psd, 1, true_study_size, f) != true_study_size)
1494     {
1495     FAIL_READ:
1496     fprintf(outfile, "Failed to read data from %s\n", p);
1497     if (extra != NULL) new_free(extra);
1498     if (re != NULL) new_free(re);
1499     fclose(f);
1500     continue;
1501     }
1502     fprintf(outfile, "Study data loaded from %s\n", p);
1503     do_study = 1; /* To get the data output if requested */
1504     }
1505     else fprintf(outfile, "No study data\n");
1506    
1507     fclose(f);
1508     goto SHOW_INFO;
1509     }
1510    
1511     /* In-line pattern (the usual case). Get the delimiter and seek the end of
1512     the pattern; if is isn't complete, read more. */
1513    
1514 nigel 3 delimiter = *p++;
1515    
1516 nigel 29 if (isalnum(delimiter) || delimiter == '\\')
1517 nigel 3 {
1518 ph10 274 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1519 nigel 3 goto SKIP_DATA;
1520     }
1521    
1522     pp = p;
1523 ph10 530 poffset = (int)(p - buffer);
1524 nigel 3
1525     for(;;)
1526     {
1527 nigel 29 while (*pp != 0)
1528     {
1529     if (*pp == '\\' && pp[1] != 0) pp++;
1530     else if (*pp == delimiter) break;
1531     pp++;
1532     }
1533 nigel 3 if (*pp != 0) break;
1534 ph10 287 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1535 nigel 3 {
1536     fprintf(outfile, "** Unexpected EOF\n");
1537 nigel 11 done = 1;
1538     goto CONTINUE;
1539 nigel 3 }
1540 nigel 23 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1541 nigel 3 }
1542    
1543 nigel 93 /* The buffer may have moved while being extended; reset the start of data
1544     pointer to the correct relative point in the buffer. */
1545    
1546     p = buffer + poffset;
1547    
1548 nigel 29 /* If the first character after the delimiter is backslash, make
1549     the pattern end with backslash. This is purely to provide a way
1550     of testing for the error message when a pattern ends with backslash. */
1551    
1552     if (pp[1] == '\\') *pp++ = '\\';
1553    
1554 nigel 75 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1555     for callouts. */
1556 nigel 3
1557     *pp++ = 0;
1558 nigel 75 strcpy((char *)pbuffer, (char *)p);
1559 nigel 3
1560     /* Look for options after final delimiter */
1561    
1562     options = 0;
1563     study_options = 0;
1564 nigel 31 log_store = showstore; /* default from command line */
1565    
1566 nigel 3 while (*pp != 0)
1567     {
1568     switch (*pp++)
1569     {
1570 nigel 77 case 'f': options |= PCRE_FIRSTLINE; break;
1571 nigel 35 case 'g': do_g = 1; break;
1572 nigel 3 case 'i': options |= PCRE_CASELESS; break;
1573     case 'm': options |= PCRE_MULTILINE; break;
1574     case 's': options |= PCRE_DOTALL; break;
1575     case 'x': options |= PCRE_EXTENDED; break;
1576 nigel 25
1577 nigel 35 case '+': do_showrest = 1; break;
1578 nigel 3 case 'A': options |= PCRE_ANCHORED; break;
1579 nigel 93 case 'B': do_debug = 1; break;
1580 nigel 75 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1581 nigel 25 case 'D': do_debug = do_showinfo = 1; break;
1582 nigel 3 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1583 nigel 75 case 'F': do_flip = 1; break;
1584 nigel 35 case 'G': do_G = 1; break;
1585 nigel 25 case 'I': do_showinfo = 1; break;
1586 nigel 91 case 'J': options |= PCRE_DUPNAMES; break;
1587 ph10 512 case 'K': do_mark = 1; break;
1588 nigel 31 case 'M': log_store = 1; break;
1589 nigel 63 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1590 nigel 37
1591     #if !defined NOPOSIX
1592 nigel 3 case 'P': do_posix = 1; break;
1593 nigel 37 #endif
1594    
1595 nigel 3 case 'S': do_study = 1; break;
1596 nigel 19 case 'U': options |= PCRE_UNGREEDY; break;
1597 ph10 535 case 'W': options |= PCRE_UCP; break;
1598 nigel 3 case 'X': options |= PCRE_EXTRA; break;
1599 ph10 576 case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
1600 ph10 126 case 'Z': debug_lengths = 0; break;
1601 nigel 67 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1602 nigel 71 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1603 ph10 545
1604 ph10 541 case 'T':
1605     switch (*pp++)
1606     {
1607     case '0': tables = tables0; break;
1608     case '1': tables = tables1; break;
1609 ph10 545
1610 ph10 541 case '\r':
1611     case '\n':
1612 ph10 545 case ' ':
1613     case 0:
1614 ph10 541 fprintf(outfile, "** Missing table number after /T\n");
1615 ph10 545 goto SKIP_DATA;
1616    
1617     default:
1618 ph10 541 fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1619 ph10 545 goto SKIP_DATA;
1620 ph10 541 }
1621 ph10 545 break;
1622 nigel 25
1623     case 'L':
1624     ppp = pp;
1625 nigel 93 /* The '\r' test here is so that it works on Windows. */
1626     /* The '0' test is just in case this is an unterminated line. */
1627     while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1628 nigel 25 *ppp = 0;
1629     if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1630     {
1631     fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1632     goto SKIP_DATA;
1633     }
1634 nigel 93 locale_set = 1;
1635 nigel 25 tables = pcre_maketables();
1636     pp = ppp;
1637     break;
1638    
1639 nigel 75 case '>':
1640     to_file = pp;
1641     while (*pp != 0) pp++;
1642     while (isspace(pp[-1])) pp--;
1643     *pp = 0;
1644     break;
1645    
1646 nigel 91 case '<':
1647     {
1648 ph10 518 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1649 ph10 336 {
1650     options |= PCRE_JAVASCRIPT_COMPAT;
1651 ph10 345 pp += 3;
1652 ph10 336 }
1653     else
1654 ph10 345 {
1655 ph10 336 int x = check_newline(pp, outfile);
1656     if (x == 0) goto SKIP_DATA;
1657     options |= x;
1658     while (*pp++ != '>');
1659 ph10 345 }
1660 nigel 91 }
1661     break;
1662    
1663 nigel 77 case '\r': /* So that it works in Windows */
1664     case '\n':
1665     case ' ':
1666     break;
1667 nigel 75
1668 nigel 3 default:
1669     fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1670     goto SKIP_DATA;
1671     }
1672     }
1673    
1674 nigel 11 /* Handle compiling via the POSIX interface, which doesn't support the
1675 nigel 25 timing, showing, or debugging options, nor the ability to pass over
1676     local character tables. */
1677 nigel 3
1678 nigel 37 #if !defined NOPOSIX
1679 nigel 3 if (posix || do_posix)
1680     {
1681     int rc;
1682     int cflags = 0;
1683 nigel 75
1684 nigel 3 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1685     if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1686 nigel 77 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1687 nigel 87 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1688     if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1689 ph10 518 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1690 ph10 461 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1691 nigel 87
1692 nigel 3 rc = regcomp(&preg, (char *)p, cflags);
1693    
1694     /* Compilation failed; go back for another re, skipping to blank line
1695     if non-interactive. */
1696    
1697     if (rc != 0)
1698     {
1699 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1700 nigel 3 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1701     goto SKIP_DATA;
1702     }
1703     }
1704    
1705     /* Handle compiling via the native interface */
1706    
1707     else
1708 nigel 37 #endif /* !defined NOPOSIX */
1709    
1710 nigel 3 {
1711 ph10 412 unsigned long int get_options;
1712 ph10 416
1713 nigel 93 if (timeit > 0)
1714 nigel 3 {
1715     register int i;
1716     clock_t time_taken;
1717     clock_t start_time = clock();
1718 nigel 93 for (i = 0; i < timeit; i++)
1719 nigel 3 {
1720 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1721 nigel 3 if (re != NULL) free(re);
1722     }
1723     time_taken = clock() - start_time;
1724 nigel 93 fprintf(outfile, "Compile time %.4f milliseconds\n",
1725     (((double)time_taken * 1000.0) / (double)timeit) /
1726 nigel 63 (double)CLOCKS_PER_SEC);
1727 nigel 3 }
1728    
1729 nigel 25 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1730 nigel 3
1731     /* Compilation failed; go back for another re, skipping to blank line
1732     if non-interactive. */
1733    
1734     if (re == NULL)
1735     {
1736     fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1737     SKIP_DATA:
1738     if (infile != stdin)
1739     {
1740     for (;;)
1741     {
1742 ph10 287 if (extend_inputline(infile, buffer, NULL) == NULL)
1743 nigel 11 {
1744     done = 1;
1745     goto CONTINUE;
1746     }
1747 nigel 3 len = (int)strlen((char *)buffer);
1748     while (len > 0 && isspace(buffer[len-1])) len--;
1749     if (len == 0) break;
1750     }
1751     fprintf(outfile, "\n");
1752     }
1753 nigel 25 goto CONTINUE;
1754 nigel 3 }
1755 ph10 416
1756     /* Compilation succeeded. It is now possible to set the UTF-8 option from
1757     within the regex; check for this so that we know how to process the data
1758 ph10 412 lines. */
1759 ph10 416
1760 ph10 412 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1761     if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1762 nigel 3
1763 ph10 412 /* Print information if required. There are now two info-returning
1764     functions. The old one has a limited interface and returns only limited
1765     data. Check that it agrees with the newer one. */
1766 nigel 3
1767 nigel 63 if (log_store)
1768     fprintf(outfile, "Memory allocation (code space): %d\n",
1769     (int)(gotten_store -
1770     sizeof(real_pcre) -
1771     ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1772    
1773 nigel 75 /* Extract the size for possible writing before possibly flipping it,
1774     and remember the store that was got. */
1775    
1776     true_size = ((real_pcre *)re)->size;
1777     regex_gotten_store = gotten_store;
1778    
1779     /* If /S was present, study the regexp to generate additional info to
1780     help with the matching. */
1781    
1782     if (do_study)
1783     {
1784 nigel 93 if (timeit > 0)
1785 nigel 75 {
1786     register int i;
1787     clock_t time_taken;
1788     clock_t start_time = clock();
1789 nigel 93 for (i = 0; i < timeit; i++)
1790 nigel 75 extra = pcre_study(re, study_options, &error);
1791     time_taken = clock() - start_time;
1792     if (extra != NULL) free(extra);
1793 nigel 93 fprintf(outfile, " Study time %.4f milliseconds\n",
1794     (((double)time_taken * 1000.0) / (double)timeit) /
1795 nigel 75 (double)CLOCKS_PER_SEC);
1796     }
1797     extra = pcre_study(re, study_options, &error);
1798     if (error != NULL)
1799     fprintf(outfile, "Failed to study: %s\n", error);
1800     else if (extra != NULL)
1801     true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1802     }
1803 ph10 512
1804 ph10 510 /* If /K was present, we set up for handling MARK data. */
1805 ph10 512
1806 ph10 510 if (do_mark)
1807     {
1808     if (extra == NULL)
1809     {
1810     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1811     extra->flags = 0;
1812     }
1813 ph10 512 extra->mark = &markptr;
1814 ph10 510 extra->flags |= PCRE_EXTRA_MARK;
1815 ph10 512 }
1816 nigel 75
1817     /* If the 'F' option was present, we flip the bytes of all the integer
1818     fields in the regex data block and the study block. This is to make it
1819     possible to test PCRE's handling of byte-flipped patterns, e.g. those
1820     compiled on a different architecture. */
1821    
1822     if (do_flip)
1823     {
1824     real_pcre *rre = (real_pcre *)re;
1825 ph10 259 rre->magic_number =
1826 ph10 255 byteflip(rre->magic_number, sizeof(rre->magic_number));
1827 nigel 75 rre->size = byteflip(rre->size, sizeof(rre->size));
1828     rre->options = byteflip(rre->options, sizeof(rre->options));
1829 ph10 255 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1830 ph10 259 rre->top_bracket =
1831 ph10 255 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1832 ph10 259 rre->top_backref =
1833 ph10 255 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1834 ph10 259 rre->first_byte =
1835 ph10 255 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1836 ph10 259 rre->req_byte =
1837 ph10 255 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1838     rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1839 nigel 75 sizeof(rre->name_table_offset));
1840 ph10 255 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1841 nigel 75 sizeof(rre->name_entry_size));
1842 ph10 259 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1843 ph10 255 sizeof(rre->name_count));
1844 nigel 75
1845     if (extra != NULL)
1846     {
1847     pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1848     rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1849 ph10 455 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1850     rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1851 nigel 75 }
1852     }
1853    
1854     /* Extract information from the compiled data if required */
1855    
1856     SHOW_INFO:
1857    
1858 nigel 93 if (do_debug)
1859     {
1860     fprintf(outfile, "------------------------------------------------------------------\n");
1861 ph10 116 pcre_printint(re, outfile, debug_lengths);
1862 nigel 93 }
1863 ph10 416
1864 ph10 412 /* We already have the options in get_options (see above) */
1865 nigel 93
1866 nigel 25 if (do_showinfo)
1867 nigel 3 {
1868 ph10 412 unsigned long int all_options;
1869 nigel 79 #if !defined NOINFOCHECK
1870 nigel 43 int old_first_char, old_options, old_count;
1871 nigel 79 #endif
1872 ph10 226 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1873 ph10 227 hascrorlf;
1874 nigel 63 int nameentrysize, namecount;
1875     const uschar *nametable;
1876 nigel 3
1877 nigel 43 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1878     new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1879     new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1880 nigel 63 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1881 nigel 43 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1882 nigel 63 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1883     new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1884 nigel 67 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1885 ph10 172 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1886     new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1887 ph10 226 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1888 nigel 43
1889 nigel 79 #if !defined NOINFOCHECK
1890 nigel 43 old_count = pcre_info(re, &old_options, &old_first_char);
1891 nigel 3 if (count < 0) fprintf(outfile,
1892 nigel 43 "Error %d from pcre_info()\n", count);
1893 nigel 3 else
1894     {
1895 nigel 43 if (old_count != count) fprintf(outfile,
1896     "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1897     old_count);
1898 nigel 37
1899 nigel 43 if (old_first_char != first_char) fprintf(outfile,
1900     "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1901     first_char, old_first_char);
1902 nigel 37
1903 nigel 53 if (old_options != (int)get_options) fprintf(outfile,
1904     "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1905     get_options, old_options);
1906 nigel 43 }
1907 nigel 79 #endif
1908 nigel 43
1909 nigel 75 if (size != regex_gotten_store) fprintf(outfile,
1910 nigel 43 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1911 nigel 77 (int)size, (int)regex_gotten_store);
1912 nigel 43
1913     fprintf(outfile, "Capturing subpattern count = %d\n", count);
1914     if (backrefmax > 0)
1915     fprintf(outfile, "Max back reference = %d\n", backrefmax);
1916 nigel 63
1917     if (namecount > 0)
1918     {
1919     fprintf(outfile, "Named capturing subpatterns:\n");
1920     while (namecount-- > 0)
1921     {
1922     fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1923     nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1924     GET2(nametable, 0));
1925     nametable += nameentrysize;
1926     }
1927     }
1928 ph10 172
1929 ph10 169 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1930 ph10 227 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1931 nigel 63
1932 nigel 75 all_options = ((real_pcre *)re)->options;
1933 ph10 169 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1934 nigel 75
1935 nigel 53 if (get_options == 0) fprintf(outfile, "No options\n");
1936 ph10 576 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1937 nigel 53 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1938     ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1939     ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1940     ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1941 nigel 77 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1942 nigel 53 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1943 ph10 231 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1944     ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1945 nigel 53 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1946     ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1947     ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1948 nigel 87 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1949 nigel 71 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1950 ph10 518 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1951 nigel 91 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1952 ph10 576 ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
1953 nigel 91 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1954 ph10 172
1955 ph10 169 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1956 nigel 43
1957 nigel 93 switch (get_options & PCRE_NEWLINE_BITS)
1958 nigel 91 {
1959     case PCRE_NEWLINE_CR:
1960     fprintf(outfile, "Forced newline sequence: CR\n");
1961     break;
1962 nigel 43
1963 nigel 91 case PCRE_NEWLINE_LF:
1964     fprintf(outfile, "Forced newline sequence: LF\n");
1965     break;
1966    
1967     case PCRE_NEWLINE_CRLF:
1968     fprintf(outfile, "Forced newline sequence: CRLF\n");
1969     break;
1970    
1971 ph10 149 case PCRE_NEWLINE_ANYCRLF:
1972     fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1973     break;
1974    
1975 nigel 93 case PCRE_NEWLINE_ANY:
1976     fprintf(outfile, "Forced newline sequence: ANY\n");
1977     break;
1978    
1979 nigel 91 default:
1980     break;
1981     }
1982    
1983 nigel 43 if (first_char == -1)
1984     {
1985 nigel 91 fprintf(outfile, "First char at start or follows newline\n");
1986 nigel 43 }
1987     else if (first_char < 0)
1988     {
1989     fprintf(outfile, "No first char\n");
1990     }
1991     else
1992     {
1993 nigel 63 int ch = first_char & 255;
1994 nigel 67 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1995 nigel 63 "" : " (caseless)";
1996 nigel 93 if (PRINTHEX(ch))
1997 nigel 63 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1998 nigel 3 else
1999 nigel 63 fprintf(outfile, "First char = %d%s\n", ch, caseless);
2000 nigel 43 }
2001 nigel 37
2002 nigel 43 if (need_char < 0)
2003     {
2004     fprintf(outfile, "No need char\n");
2005 nigel 3 }
2006 nigel 43 else
2007     {
2008 nigel 63 int ch = need_char & 255;
2009 nigel 67 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2010 nigel 63 "" : " (caseless)";
2011 nigel 93 if (PRINTHEX(ch))
2012 nigel 63 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2013 nigel 43 else
2014 nigel 63 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2015 nigel 43 }
2016 nigel 75
2017     /* Don't output study size; at present it is in any case a fixed
2018     value, but it varies, depending on the computer architecture, and
2019     so messes up the test suite. (And with the /F option, it might be
2020     flipped.) */
2021    
2022     if (do_study)
2023     {
2024     if (extra == NULL)
2025     fprintf(outfile, "Study returned NULL\n");
2026     else
2027     {
2028     uschar *start_bits = NULL;
2029 ph10 455 int minlength;
2030 ph10 461
2031 ph10 455 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2032 ph10 461 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2033    
2034 nigel 75 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2035     if (start_bits == NULL)
2036 ph10 455 fprintf(outfile, "No set of starting bytes\n");
2037 nigel 75 else
2038     {
2039     int i;
2040     int c = 24;
2041     fprintf(outfile, "Starting byte set: ");
2042     for (i = 0; i < 256; i++)
2043     {
2044     if ((start_bits[i/8] & (1<<(i&7))) != 0)
2045     {
2046     if (c > 75)
2047     {
2048     fprintf(outfile, "\n ");
2049     c = 2;
2050     }
2051 nigel 93 if (PRINTHEX(i) && i != ' ')
2052 nigel 75 {
2053     fprintf(outfile, "%c ", i);
2054     c += 2;
2055     }
2056     else
2057     {
2058     fprintf(outfile, "\\x%02x ", i);
2059     c += 5;
2060     }
2061     }
2062     }
2063     fprintf(outfile, "\n");
2064     }
2065     }
2066     }
2067 nigel 3 }
2068    
2069 nigel 75 /* If the '>' option was present, we write out the regex to a file, and
2070     that is all. The first 8 bytes of the file are the regex length and then
2071     the study length, in big-endian order. */
2072 nigel 3
2073 nigel 75 if (to_file != NULL)
2074 nigel 3 {
2075 nigel 75 FILE *f = fopen((char *)to_file, "wb");
2076     if (f == NULL)
2077 nigel 3 {
2078 nigel 75 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2079 nigel 3 }
2080 nigel 75 else
2081     {
2082     uschar sbuf[8];
2083 ph10 255 sbuf[0] = (uschar)((true_size >> 24) & 255);
2084     sbuf[1] = (uschar)((true_size >> 16) & 255);
2085     sbuf[2] = (uschar)((true_size >> 8) & 255);
2086     sbuf[3] = (uschar)((true_size) & 255);
2087 ph10 259
2088 ph10 255 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2089     sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2090     sbuf[6] = (uschar)((true_study_size >> 8) & 255);
2091     sbuf[7] = (uschar)((true_study_size) & 255);
2092 nigel 3
2093 nigel 75 if (fwrite(sbuf, 1, 8, f) < 8 ||
2094     fwrite(re, 1, true_size, f) < true_size)
2095     {
2096     fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2097     }
2098 nigel 3 else
2099     {
2100 nigel 75 fprintf(outfile, "Compiled regex written to %s\n", to_file);
2101     if (extra != NULL)
2102 nigel 3 {
2103 nigel 75 if (fwrite(extra->study_data, 1, true_study_size, f) <
2104     true_study_size)
2105 nigel 3 {
2106 nigel 75 fprintf(outfile, "Write error on %s: %s\n", to_file,
2107     strerror(errno));
2108 nigel 3 }
2109 nigel 75 else fprintf(outfile, "Study data written to %s\n", to_file);
2110 nigel 93
2111 nigel 3 }
2112     }
2113 nigel 75 fclose(f);
2114 nigel 3 }
2115 nigel 77
2116     new_free(re);
2117     if (extra != NULL) new_free(extra);
2118 ph10 545 if (locale_set)
2119 ph10 541 {
2120     new_free((void *)tables);
2121     setlocale(LC_CTYPE, "C");
2122 ph10 545 locale_set = 0;
2123     }
2124 nigel 75 continue; /* With next regex */
2125 nigel 3 }
2126 nigel 75 } /* End of non-POSIX compile */
2127 nigel 3
2128     /* Read data lines and test them */
2129    
2130     for (;;)
2131     {
2132 nigel 87 uschar *q;
2133 ph10 147 uschar *bptr;
2134 nigel 57 int *use_offsets = offsets;
2135 nigel 53 int use_size_offsets = size_offsets;
2136 nigel 63 int callout_data = 0;
2137     int callout_data_set = 0;
2138 nigel 3 int count, c;
2139 nigel 29 int copystrings = 0;
2140 ph10 386 int find_match_limit = default_find_match_limit;
2141 nigel 29 int getstrings = 0;
2142     int getlist = 0;
2143 nigel 39 int gmatched = 0;
2144 nigel 35 int start_offset = 0;
2145 ph10 579 int start_offset_sign = 1;
2146 nigel 41 int g_notempty = 0;
2147 nigel 77 int use_dfa = 0;
2148 nigel 3
2149     options = 0;
2150    
2151 nigel 91 *copynames = 0;
2152     *getnames = 0;
2153    
2154     copynamesptr = copynames;
2155     getnamesptr = getnames;
2156    
2157 nigel 63 pcre_callout = callout;
2158     first_callout = 1;
2159     callout_extra = 0;
2160     callout_count = 0;
2161     callout_fail_count = 999999;
2162     callout_fail_id = -1;
2163 nigel 73 show_malloc = 0;
2164 nigel 63
2165 nigel 91 if (extra != NULL) extra->flags &=
2166     ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2167    
2168     len = 0;
2169     for (;;)
2170 nigel 11 {
2171 ph10 287 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2172 nigel 91 {
2173 ph10 537 if (len > 0) /* Reached EOF without hitting a newline */
2174     {
2175 ph10 545 fprintf(outfile, "\n");
2176 ph10 537 break;
2177 ph10 545 }
2178 nigel 91 done = 1;
2179     goto CONTINUE;
2180     }
2181     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2182     len = (int)strlen((char *)buffer);
2183     if (buffer[len-1] == '\n') break;
2184 nigel 11 }
2185 nigel 3
2186     while (len > 0 && isspace(buffer[len-1])) len--;
2187     buffer[len] = 0;
2188     if (len == 0) break;
2189    
2190     p = buffer;
2191     while (isspace(*p)) p++;
2192    
2193 ph10 147 bptr = q = dbuffer;
2194 nigel 3 while ((c = *p++) != 0)
2195     {
2196     int i = 0;
2197     int n = 0;
2198 nigel 63
2199 nigel 3 if (c == '\\') switch ((c = *p++))
2200     {
2201     case 'a': c = 7; break;
2202     case 'b': c = '\b'; break;
2203     case 'e': c = 27; break;
2204     case 'f': c = '\f'; break;
2205     case 'n': c = '\n'; break;
2206     case 'r': c = '\r'; break;
2207     case 't': c = '\t'; break;
2208     case 'v': c = '\v'; break;
2209    
2210     case '0': case '1': case '2': case '3':
2211     case '4': case '5': case '6': case '7':
2212     c -= '0';
2213     while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2214     c = c * 8 + *p++ - '0';
2215 nigel 91
2216     #if !defined NOUTF8
2217     if (use_utf8 && c > 255)
2218     {
2219     unsigned char buff8[8];
2220     int ii, utn;
2221     utn = ord2utf8(c, buff8);
2222     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2223     c = buff8[ii]; /* Last byte */
2224     }
2225     #endif
2226 nigel 3 break;
2227    
2228     case 'x':
2229 nigel 49
2230     /* Handle \x{..} specially - new Perl thing for utf8 */
2231    
2232 nigel 79 #if !defined NOUTF8
2233 nigel 49 if (*p == '{')
2234     {
2235     unsigned char *pt = p;
2236     c = 0;
2237     while (isxdigit(*(++pt)))
2238     c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2239     if (*pt == '}')
2240     {
2241 nigel 67 unsigned char buff8[8];
2242 nigel 49 int ii, utn;
2243 ph10 355 if (use_utf8)
2244 ph10 358 {
2245 ph10 355 utn = ord2utf8(c, buff8);
2246     for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2247     c = buff8[ii]; /* Last byte */
2248     }
2249     else
2250     {
2251 ph10 358 if (c > 255)
2252 ph10 355 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2253     "UTF-8 mode is not enabled.\n"
2254     "** Truncation will probably give the wrong result.\n", c);
2255 ph10 358 }
2256 nigel 49 p = pt + 1;
2257     break;
2258     }
2259     /* Not correct form; fall through */
2260     }
2261 nigel 79 #endif
2262 nigel 49
2263     /* Ordinary \x */
2264    
2265 nigel 3 c = 0;
2266     while (i++ < 2 && isxdigit(*p))
2267     {
2268     c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
2269     p++;
2270     }
2271     break;
2272    
2273 nigel 75 case 0: /* \ followed by EOF allows for an empty line */
2274 nigel 3 p--;
2275     continue;
2276    
2277 nigel 75 case '>':
2278 ph10 579 if (*p == '-')
2279 ph10 567 {
2280     start_offset_sign = -1;
2281     p++;
2282 ph10 579 }
2283 nigel 75 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2284 ph10 579 start_offset *= start_offset_sign;
2285 nigel 75 continue;
2286    
2287 nigel 3 case 'A': /* Option setting */
2288     options |= PCRE_ANCHORED;
2289     continue;
2290    
2291     case 'B':
2292     options |= PCRE_NOTBOL;
2293     continue;
2294    
2295 nigel 29 case 'C':
2296 nigel 63 if (isdigit(*p)) /* Set copy string */
2297     {
2298     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2299     copystrings |= 1 << n;
2300     }
2301     else if (isalnum(*p))
2302     {
2303 nigel 91 uschar *npp = copynamesptr;
2304 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2305 nigel 91 *npp++ = 0;
2306 nigel 67 *npp = 0;
2307 nigel 91 n = pcre_get_stringnumber(re, (char *)copynamesptr);
2308 nigel 63 if (n < 0)
2309 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2310     copynamesptr = npp;
2311 nigel 63 }
2312     else if (*p == '+')
2313     {
2314     callout_extra = 1;
2315     p++;
2316     }
2317     else if (*p == '-')
2318     {
2319     pcre_callout = NULL;
2320     p++;
2321     }
2322     else if (*p == '!')
2323     {
2324     callout_fail_id = 0;
2325     p++;
2326     while(isdigit(*p))
2327     callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2328     callout_fail_count = 0;
2329     if (*p == '!')
2330     {
2331     p++;
2332     while(isdigit(*p))
2333     callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2334     }
2335     }
2336     else if (*p == '*')
2337     {
2338     int sign = 1;
2339     callout_data = 0;
2340     if (*(++p) == '-') { sign = -1; p++; }
2341     while(isdigit(*p))
2342     callout_data = callout_data * 10 + *p++ - '0';
2343     callout_data *= sign;
2344     callout_data_set = 1;
2345     }
2346 nigel 29 continue;
2347    
2348 nigel 79 #if !defined NODFA
2349 nigel 77 case 'D':
2350 nigel 79 #if !defined NOPOSIX
2351 nigel 77 if (posix || do_posix)
2352     printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2353     else
2354 nigel 79 #endif
2355 nigel 77 use_dfa = 1;
2356     continue;
2357 ph10 553 #endif
2358 nigel 77
2359 ph10 553 #if !defined NODFA
2360 nigel 77 case 'F':
2361     options |= PCRE_DFA_SHORTEST;
2362     continue;
2363 nigel 79 #endif
2364 nigel 77
2365 nigel 29 case 'G':
2366 nigel 63 if (isdigit(*p))
2367     {
2368     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2369     getstrings |= 1 << n;
2370     }
2371     else if (isalnum(*p))
2372     {
2373 nigel 91 uschar *npp = getnamesptr;
2374 nigel 67 while (isalnum(*p)) *npp++ = *p++;
2375 nigel 91 *npp++ = 0;
2376 nigel 67 *npp = 0;
2377 nigel 91 n = pcre_get_stringnumber(re, (char *)getnamesptr);
2378 nigel 63 if (n < 0)
2379 nigel 91 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2380     getnamesptr = npp;
2381 nigel 63 }
2382 nigel 29 continue;
2383    
2384     case 'L':
2385     getlist = 1;
2386     continue;
2387    
2388 nigel 63 case 'M':
2389     find_match_limit = 1;
2390     continue;
2391    
2392 nigel 37 case 'N':
2393 ph10 442 if ((options & PCRE_NOTEMPTY) != 0)
2394     options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2395 ph10 461 else
2396 ph10 442 options |= PCRE_NOTEMPTY;
2397 nigel 37 continue;
2398    
2399 nigel 3 case 'O':
2400     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2401 nigel 53 if (n > size_offsets_max)
2402     {
2403     size_offsets_max = n;
2404 nigel 57 free(offsets);
2405 nigel 71 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2406 nigel 53 if (offsets == NULL)
2407     {
2408     printf("** Failed to get %d bytes of memory for offsets vector\n",
2409 ph10 151 (int)(size_offsets_max * sizeof(int)));
2410 nigel 77 yield = 1;
2411     goto EXIT;
2412 nigel 53 }
2413     }
2414     use_size_offsets = n;
2415 nigel 63 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2416 nigel 3 continue;
2417    
2418 nigel 75 case 'P':
2419 ph10 461 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2420 ph10 427 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2421 nigel 75 continue;
2422    
2423 nigel 91 case 'Q':
2424     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2425     if (extra == NULL)
2426     {
2427     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2428     extra->flags = 0;
2429     }
2430     extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2431     extra->match_limit_recursion = n;
2432     continue;
2433    
2434     case 'q':
2435     while(isdigit(*p)) n = n * 10 + *p++ - '0';
2436     if (extra == NULL)
2437     {
2438     extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2439     extra->flags = 0;
2440     }
2441     extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2442     extra->match_limit = n;
2443     continue;
2444    
2445 nigel 79 #if !defined NODFA
2446 nigel 77 case 'R':
2447     options |= PCRE_DFA_RESTART;
2448     continue;
2449 nigel 79 #endif
2450 nigel 77
2451 nigel 73 case 'S':
2452     show_malloc = 1;
2453     continue;
2454 ph10 392
2455 ph10 389 case 'Y':
2456     options |= PCRE_NO_START_OPTIMIZE;
2457 ph10 392 continue;
2458 nigel 73
2459 nigel 3 case 'Z':
2460     options |= PCRE_NOTEOL;
2461     continue;
2462 nigel 71
2463     case '?':
2464     options |= PCRE_NO_UTF8_CHECK;
2465     continue;
2466 nigel 91
2467     case '<':
2468     {
2469     int x = check_newline(p, outfile);
2470     if (x == 0) goto NEXT_DATA;
2471     options |= x;
2472     while (*p++ != '>');
2473     }
2474     continue;
2475 nigel 3 }
2476 nigel 9 *q++ = c;
2477 nigel 3 }
2478 nigel 9 *q = 0;
2479 ph10 530 len = (int)(q - dbuffer);
2480 ph10 545
2481 ph10 361 /* Move the data to the end of the buffer so that a read over the end of
2482 ph10 371 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2483 ph10 363 we are using the POSIX interface, we must include the terminating zero. */
2484 ph10 371
2485 ph10 363 #if !defined NOPOSIX
2486     if (posix || do_posix)
2487     {
2488     memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2489 ph10 371 bptr += buffer_size - len - 1;
2490 ph10 363 }
2491 ph10 371 else
2492     #endif
2493 ph10 363 {
2494     memmove(bptr + buffer_size - len, bptr, len);
2495 ph10 371 bptr += buffer_size - len;
2496     }
2497 nigel 3
2498 nigel 77 if ((all_use_dfa || use_dfa) && find_match_limit)
2499     {
2500     printf("**Match limit not relevant for DFA matching: ignored\n");
2501     find_match_limit = 0;
2502     }
2503    
2504 nigel 3 /* Handle matching via the POSIX interface, which does not
2505 nigel 63 support timing or playing with the match limit or callout data. */
2506 nigel 3
2507 nigel 37 #if !defined NOPOSIX
2508 nigel 3 if (posix || do_posix)
2509     {
2510     int rc;
2511     int eflags = 0;
2512 nigel 63 regmatch_t *pmatch = NULL;
2513     if (use_size_offsets > 0)
2514 nigel 71 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2515 nigel 3 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2516     if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2517 ph10 392 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2518 nigel 3
2519 nigel 53 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2520 nigel 3
2521     if (rc != 0)
2522     {
2523 nigel 91 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2524 nigel 3 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2525     }
2526 nigel 87 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2527     != 0)
2528     {
2529     fprintf(outfile, "Matched with REG_NOSUB\n");
2530     }
2531 nigel 3 else
2532     {
2533 nigel 7 size_t i;
2534 nigel 63 for (i = 0; i < (size_t)use_size_offsets; i++)
2535 nigel 3 {
2536     if (pmatch[i].rm_so >= 0)
2537     {
2538 nigel 23 fprintf(outfile, "%2d: ", (int)i);
2539 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_so,
2540     pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2541 nigel 3 fprintf(outfile, "\n");
2542 nigel 35 if (i == 0 && do_showrest)
2543     {
2544     fprintf(outfile, " 0+ ");
2545 nigel 63 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2546     outfile);
2547 nigel 35 fprintf(outfile, "\n");
2548     }
2549 nigel 3 }
2550     }
2551     }
2552 nigel 53 free(pmatch);
2553 nigel 3 }
2554    
2555 nigel 35 /* Handle matching via the native interface - repeats for /g and /G */
2556 nigel 3
2557 nigel 37 else
2558     #endif /* !defined NOPOSIX */
2559    
2560 nigel 39 for (;; gmatched++) /* Loop for /g or /G */
2561 nigel 3 {
2562 ph10 512 markptr = NULL;
2563    
2564 nigel 93 if (timeitm > 0)
2565 nigel 3 {
2566     register int i;
2567     clock_t time_taken;
2568     clock_t start_time = clock();
2569 nigel 77
2570 nigel 79 #if !defined NODFA
2571 nigel 77 if (all_use_dfa || use_dfa)
2572     {
2573     int workspace[1000];
2574 nigel 93 for (i = 0; i < timeitm; i++)
2575 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2576 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2577     sizeof(workspace)/sizeof(int));
2578     }
2579     else
2580 nigel 79 #endif
2581 nigel 77
2582 nigel 93 for (i = 0; i < timeitm; i++)
2583 nigel 35 count = pcre_exec(re, extra, (char *)bptr, len,
2584 nigel 57 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2585 nigel 77
2586 nigel 3 time_taken = clock() - start_time;
2587 nigel 93 fprintf(outfile, "Execute time %.4f milliseconds\n",
2588     (((double)time_taken * 1000.0) / (double)timeitm) /
2589 nigel 63 (double)CLOCKS_PER_SEC);
2590 nigel 3 }
2591    
2592 nigel 63 /* If find_match_limit is set, we want to do repeated matches with
2593 nigel 87 varying limits in order to find the minimum value for the match limit and
2594     for the recursion limit. */
2595 nigel 63
2596     if (find_match_limit)
2597     {
2598     if (extra == NULL)
2599     {
2600 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2601 nigel 63 extra->flags = 0;
2602     }
2603    
2604 nigel 91 (void)check_match_limit(re, extra, bptr, len, start_offset,
2605 nigel 87 options|g_notempty, use_offsets, use_size_offsets,
2606     PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2607     PCRE_ERROR_MATCHLIMIT, "match()");
2608 nigel 63
2609 nigel 87 count = check_match_limit(re, extra, bptr, len, start_offset,
2610     options|g_notempty, use_offsets, use_size_offsets,
2611     PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2612     PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2613 nigel 63 }
2614    
2615     /* If callout_data is set, use the interface with additional data */
2616    
2617     else if (callout_data_set)
2618     {
2619     if (extra == NULL)
2620     {
2621 nigel 71 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2622 nigel 63 extra->flags = 0;
2623     }
2624     extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2625 nigel 71 extra->callout_data = &callout_data;
2626 nigel 63 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2627     options | g_notempty, use_offsets, use_size_offsets);
2628     extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2629     }
2630    
2631     /* The normal case is just to do the match once, with the default
2632     value of match_limit. */
2633    
2634 nigel 79 #if !defined NODFA
2635 nigel 77 else if (all_use_dfa || use_dfa)
2636     {
2637     int workspace[1000];
2638 ph10 455 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2639 nigel 77 options | g_notempty, use_offsets, use_size_offsets, workspace,
2640     sizeof(workspace)/sizeof(int));
2641     if (count == 0)
2642     {
2643     fprintf(outfile, "Matched, but too many subsidiary matches\n");
2644     count = use_size_offsets/2;
2645     }
2646     }
2647 nigel 79 #endif
2648 nigel 77
2649 nigel 75 else
2650     {
2651     count = pcre_exec(re, extra, (char *)bptr, len,
2652     start_offset, options | g_notempty, use_offsets, use_size_offsets);
2653 nigel 77 if (count == 0)
2654     {
2655     fprintf(outfile, "Matched, but too many substrings\n");
2656     count = use_size_offsets/3;
2657     }
2658 nigel 75 }
2659 nigel 3
2660 nigel 39 /* Matched */
2661    
2662 nigel 3 if (count >= 0)
2663     {
2664 nigel 93 int i, maxcount;
2665    
2666     #if !defined NODFA
2667     if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2668     #endif
2669     maxcount = use_size_offsets/3;
2670    
2671     /* This is a check against a lunatic return value. */
2672    
2673     if (count > maxcount)
2674     {
2675     fprintf(outfile,
2676     "** PCRE error: returned count %d is too big for offset size %d\n",
2677     count, use_size_offsets);
2678     count = use_size_offsets/3;
2679     if (do_g || do_G)
2680     {
2681     fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2682     do_g = do_G = FALSE; /* Break g/G loop */
2683     }
2684     }
2685    
2686 nigel 29 for (i = 0; i < count * 2; i += 2)
2687 nigel 3 {
2688 nigel 57 if (use_offsets[i] < 0)
2689 nigel 3 fprintf(outfile, "%2d: <unset>\n", i/2);
2690     else
2691     {
2692     fprintf(outfile, "%2d: ", i/2);
2693 nigel 63 (void)pchars(bptr + use_offsets[i],
2694     use_offsets[i+1] - use_offsets[i], outfile);
2695 nigel 3 fprintf(outfile, "\n");
2696 nigel 35 if (i == 0)
2697     {
2698     if (do_showrest)
2699     {
2700     fprintf(outfile, " 0+ ");
2701 nigel 63 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2702     outfile);
2703 nigel 35 fprintf(outfile, "\n");
2704     }
2705     }
2706 nigel 3 }
2707     }
2708 ph10 512
2709 ph10 510 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2710 nigel 29
2711     for (i = 0; i < 32; i++)
2712     {
2713     if ((copystrings & (1 << i)) != 0)
2714     {
2715 nigel 91 char copybuffer[256];
2716 nigel 57 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2717 nigel 37 i, copybuffer, sizeof(copybuffer));
2718 nigel 29 if (rc < 0)
2719     fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2720     else
2721 nigel 37 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2722 nigel 29 }
2723     }
2724    
2725 nigel 91 for (copynamesptr = copynames;
2726     *copynamesptr != 0;
2727     copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2728     {
2729     char copybuffer[256];
2730     int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2731     count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2732     if (rc < 0)
2733     fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2734     else
2735     fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2736     }
2737    
2738 nigel 29 for (i = 0; i < 32; i++)
2739     {
2740     if ((getstrings & (1 << i)) != 0)
2741     {
2742     const char *substring;
2743 nigel 57 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2744 nigel 29 i, &substring);
2745     if (rc < 0)
2746     fprintf(outfile, "get substring %d failed %d\n", i, rc);
2747     else
2748     {
2749     fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2750 nigel 49 pcre_free_substring(substring);
2751 nigel 29 }
2752     }
2753     }
2754    
2755 nigel 91 for (getnamesptr = getnames;
2756     *getnamesptr != 0;
2757     getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2758     {
2759     const char *substring;
2760     int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2761     count, (char *)getnamesptr, &substring);
2762     if (rc < 0)
2763     fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2764     else
2765     {
2766     fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2767     pcre_free_substring(substring);
2768     }
2769     }
2770    
2771 nigel 29 if (getlist)
2772     {
2773     const char **stringlist;
2774 nigel 57 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2775 nigel 29 &stringlist);
2776     if (rc < 0)
2777     fprintf(outfile, "get substring list failed %d\n", rc);
2778     else
2779     {
2780     for (i = 0; i < count; i++)
2781     fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2782     if (stringlist[i] != NULL)
2783     fprintf(outfile, "string list not terminated by NULL\n");
2784 nigel 49 /* free((void *)stringlist); */
2785     pcre_free_substring_list(stringlist);
2786 nigel 29 }
2787     }
2788 nigel 39 }
2789 nigel 29
2790 nigel 75 /* There was a partial match */
2791    
2792     else if (count == PCRE_ERROR_PARTIAL)
2793     {
2794 ph10 510 if (markptr == NULL) fprintf(outfile, "Partial match");
2795     else fprintf(outfile, "Partial match, mark=%s", markptr);
2796 ph10 426 if (use_size_offsets > 1)
2797     {
2798     fprintf(outfile, ": ");
2799     pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2800 ph10 461 outfile);
2801     }
2802 nigel 77 fprintf(outfile, "\n");
2803 nigel 75 break; /* Out of the /g loop */
2804     }
2805    
2806 nigel 41 /* Failed to match. If this is a /g or /G loop and we previously set
2807 ph10 143 g_notempty after a null match, this is not necessarily the end. We want
2808     to advance the start offset, and continue. We won't be at the end of the
2809     string - that was checked before setting g_notempty.
2810 nigel 39
2811 ph10 566 Complication arises in the case when the newline convention is "any",
2812 ph10 579 "crlf", or "anycrlf". If the previous match was at the end of a line
2813     terminated by CRLF, an advance of one character just passes the \r,
2814 ph10 566 whereas we should prefer the longer newline sequence, as does the code in
2815 ph10 579 pcre_exec(). Fudge the offset value to achieve this. We check for a
2816     newline setting in the pattern; if none was set, use pcre_config() to
2817 ph10 566 find the default.
2818 ph10 144
2819 ph10 143 Otherwise, in the case of UTF-8 matching, the advance must be one
2820     character, not one byte. */
2821    
2822 nigel 3 else
2823     {
2824 nigel 41 if (g_notempty != 0)
2825 nigel 35 {
2826 nigel 73 int onechar = 1;
2827 ph10 146 unsigned int obits = ((real_pcre *)re)->options;
2828 nigel 57 use_offsets[0] = start_offset;
2829 ph10 146 if ((obits & PCRE_NEWLINE_BITS) == 0)
2830     {
2831     int d;
2832     (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2833 ph10 391 /* Note that these values are always the ASCII ones, even in
2834     EBCDIC environments. CR = 13, NL = 10. */
2835     obits = (d == 13)? PCRE_NEWLINE_CR :
2836     (d == 10)? PCRE_NEWLINE_LF :
2837     (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2838 ph10 150 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2839 ph10 146 (d == -1)? PCRE_NEWLINE_ANY : 0;
2840     }
2841 ph10 149 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2842 ph10 566 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2843 ph10 150 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2844 ph10 149 &&
2845 ph10 143 start_offset < len - 1 &&
2846     bptr[start_offset] == '\r' &&
2847     bptr[start_offset+1] == '\n')
2848 ph10 144 onechar++;
2849 ph10 143 else if (use_utf8)
2850 nigel 73 {
2851     while (start_offset + onechar < len)
2852     {
2853 ph10 566 if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2854 ph10 579 onechar++;
2855 nigel 73 }
2856     }
2857     use_offsets[1] = start_offset + onechar;
2858 nigel 35 }
2859 nigel 41 else
2860     {
2861 nigel 73 if (count == PCRE_ERROR_NOMATCH)
2862 nigel 41 {
2863 ph10 512 if (gmatched == 0)
2864 ph10 510 {
2865     if (markptr == NULL) fprintf(outfile, "No match\n");
2866     else fprintf(outfile, "No match, mark = %s\n", markptr);
2867 ph10 512 }
2868 nigel 41 }
2869 nigel 73 else fprintf(outfile, "Error %d\n", count);
2870 nigel 41 break; /* Out of the /g loop */
2871     }
2872 nigel 3 }
2873 nigel 35
2874 nigel 39 /* If not /g or /G we are done */
2875    
2876     if (!do_g && !do_G) break;
2877    
2878 nigel 41 /* If we have matched an empty string, first check to see if we are at
2879 ph10 442 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2880     Perl's /g options does. This turns out to be rather cunning. First we set
2881     PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2882 nigel 47 same point. If this fails (picked up above) we advance to the next
2883 ph10 143 character. */
2884 ph10 142
2885 nigel 41 g_notempty = 0;
2886 ph10 142
2887 nigel 57 if (use_offsets[0] == use_offsets[1])
2888 nigel 41 {
2889 nigel 57 if (use_offsets[0] == len) break;
2890 ph10 442 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2891 nigel 41 }
2892 nigel 39
2893     /* For /g, update the start offset, leaving the rest alone */
2894    
2895 ph10 143 if (do_g) start_offset = use_offsets[1];
2896 nigel 39
2897     /* For /G, update the pointer and length */
2898    
2899     else
2900 nigel 35 {
2901 ph10 143 bptr += use_offsets[1];
2902     len -= use_offsets[1];
2903 nigel 35 }
2904 nigel 39 } /* End of loop for /g and /G */
2905 nigel 91
2906     NEXT_DATA: continue;
2907 nigel 39 } /* End of loop for data lines */
2908 nigel 3
2909 nigel 11 CONTINUE:
2910 nigel 37
2911     #if !defined NOPOSIX
2912 nigel 3 if (posix || do_posix) regfree(&preg);
2913 nigel 37 #endif
2914    
2915 nigel 77 if (re != NULL) new_free(re);
2916     if (extra != NULL) new_free(extra);
2917 ph10 541 if (locale_set)
2918 nigel 25 {
2919 nigel 77 new_free((void *)tables);
2920 nigel 25 setlocale(LC_CTYPE, "C");
2921 nigel 93 locale_set = 0;
2922 nigel 25 }
2923 nigel 3 }
2924    
2925 nigel 73 if (infile == stdin) fprintf(outfile, "\n");
2926 nigel 77
2927     EXIT:
2928    
2929     if (infile != NULL && infile != stdin) fclose(infile);
2930     if (outfile != NULL && outfile != stdout) fclose(outfile);
2931    
2932     free(buffer);
2933     free(dbuffer);
2934     free(pbuffer);
2935     free(offsets);
2936    
2937     return yield;
2938 nigel 3 }
2939    
2940 nigel 77 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12