/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 25 by nigel, Sat Feb 24 21:38:45 2007 UTC revision 567 by ph10, Sat Nov 6 17:10:00 2010 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60  /* Use the internal info for displaying the results of pcre_study(). */  /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    #else
83    #include <sys/time.h>          /* These two includes are needed */
84    #include <sys/resource.h>      /* for setrlimit(). */
85    #define INPUT_MODE   "rb"
86    #define OUTPUT_MODE  "wb"
87    #endif
88    
89  #include "internal.h"  
90    /* We have to include pcre_internal.h because we need the internal info for
91    displaying the results of pcre_study() and we also need to know about the
92    internal macros, structures, and other internal data values; pcretest has
93    "inside information" compared to a program that strictly follows the PCRE API.
94    
95    Although pcre_internal.h does itself include pcre.h, we explicitly include it
96    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97    appropriately for an application, not for building PCRE. */
98    
99    #include "pcre.h"
100    #include "pcre_internal.h"
101    
102    /* We need access to some of the data tables that PCRE uses. So as not to have
103    to keep two copies, we include the source file here, changing the names of the
104    external symbols to prevent clashes. */
105    
106    #define _pcre_ucp_gentype      ucp_gentype
107    #define _pcre_utf8_table1      utf8_table1
108    #define _pcre_utf8_table1_size utf8_table1_size
109    #define _pcre_utf8_table2      utf8_table2
110    #define _pcre_utf8_table3      utf8_table3
111    #define _pcre_utf8_table4      utf8_table4
112    #define _pcre_utt              utt
113    #define _pcre_utt_size         utt_size
114    #define _pcre_utt_names        utt_names
115    #define _pcre_OP_lengths       OP_lengths
116    
117    #include "pcre_tables.c"
118    
119    /* We also need the pcre_printint() function for printing out compiled
120    patterns. This function is in a separate file so that it can be included in
121    pcre_compile.c when that module is compiled with debugging enabled. It needs to
122    know which case is being compiled. */
123    
124    #define COMPILING_PCRETEST
125    #include "pcre_printint.src"
126    
127    /* The definition of the macro PRINTABLE, which determines whether to print an
128    output character as-is or as a hex value when showing compiled patterns, is
129    contained in the printint.src file. We uses it here also, in cases when the
130    locale has not been explicitly changed, so as to get consistent output from
131    systems that differ in their output from isprint() even in the "C" locale. */
132    
133    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134    
135    /* It is possible to compile this test program without including support for
136    testing the POSIX interface, though this is not available via the standard
137    Makefile. */
138    
139    #if !defined NOPOSIX
140  #include "pcreposix.h"  #include "pcreposix.h"
141    #endif
142    
143    /* It is also possible, for the benefit of the version currently imported into
144    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145    interface to the DFA matcher (NODFA), and without the doublecheck of the old
146    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147    UTF8 support if PCRE is built without it. */
148    
149    #ifndef SUPPORT_UTF8
150    #ifndef NOUTF8
151    #define NOUTF8
152    #endif
153    #endif
154    
155    
156    /* Other parameters */
157    
158  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
159  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 163 
163  #endif  #endif
164  #endif  #endif
165    
166  #define LOOPREPEAT 10000  /* This is the default loop count for timing. */
167    
168    #define LOOPREPEAT 500000
169    
170    /* Static variables */
171    
172  static FILE *outfile;  static FILE *outfile;
173  static int log_store = 0;  static int log_store = 0;
174    static int callout_count;
175    static int callout_extra;
176    static int callout_fail_count;
177    static int callout_fail_id;
178    static int debug_lengths;
179    static int first_callout;
180    static int locale_set = 0;
181    static int show_malloc;
182    static int use_utf8;
183    static size_t gotten_store;
184    
185    /* The buffers grow automatically if very long input lines are encountered. */
186    
187    static int buffer_size = 50000;
188    static uschar *buffer = NULL;
189    static uschar *dbuffer = NULL;
190    static uschar *pbuffer = NULL;
191    
192    
193    /*************************************************
194    *         Alternate character tables             *
195    *************************************************/
196    
197  /* Debugging function to print the internal form of the regex. This is the same  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198  code as contained in pcre.c under the DEBUG macro. */  using the default tables of the library. However, the T option can be used to
199    select alternate sets of tables, for different kinds of testing. Note also that
200  static const char *OP_names[] = {  the L (locale) option also adjusts the tables. */
201    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
202    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  /* This is the set of tables distributed as default with PCRE. It recognizes
203    "Opt", "^", "$", "Any", "chars", "not",  only ASCII characters. */
204    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
205    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  static const unsigned char tables0[] = {
206    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
207    "*", "*?", "+", "+?", "?", "??", "{", "{",  /* This table is a lower casing table. */
208    "class", "Ref",  
209    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",      0,  1,  2,  3,  4,  5,  6,  7,
210    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",      8,  9, 10, 11, 12, 13, 14, 15,
211    "Brazero", "Braminzero", "Bra"     16, 17, 18, 19, 20, 21, 22, 23,
212       24, 25, 26, 27, 28, 29, 30, 31,
213       32, 33, 34, 35, 36, 37, 38, 39,
214       40, 41, 42, 43, 44, 45, 46, 47,
215       48, 49, 50, 51, 52, 53, 54, 55,
216       56, 57, 58, 59, 60, 61, 62, 63,
217       64, 97, 98, 99,100,101,102,103,
218      104,105,106,107,108,109,110,111,
219      112,113,114,115,116,117,118,119,
220      120,121,122, 91, 92, 93, 94, 95,
221       96, 97, 98, 99,100,101,102,103,
222      104,105,106,107,108,109,110,111,
223      112,113,114,115,116,117,118,119,
224      120,121,122,123,124,125,126,127,
225      128,129,130,131,132,133,134,135,
226      136,137,138,139,140,141,142,143,
227      144,145,146,147,148,149,150,151,
228      152,153,154,155,156,157,158,159,
229      160,161,162,163,164,165,166,167,
230      168,169,170,171,172,173,174,175,
231      176,177,178,179,180,181,182,183,
232      184,185,186,187,188,189,190,191,
233      192,193,194,195,196,197,198,199,
234      200,201,202,203,204,205,206,207,
235      208,209,210,211,212,213,214,215,
236      216,217,218,219,220,221,222,223,
237      224,225,226,227,228,229,230,231,
238      232,233,234,235,236,237,238,239,
239      240,241,242,243,244,245,246,247,
240      248,249,250,251,252,253,254,255,
241    
242    /* This table is a case flipping table. */
243    
244        0,  1,  2,  3,  4,  5,  6,  7,
245        8,  9, 10, 11, 12, 13, 14, 15,
246       16, 17, 18, 19, 20, 21, 22, 23,
247       24, 25, 26, 27, 28, 29, 30, 31,
248       32, 33, 34, 35, 36, 37, 38, 39,
249       40, 41, 42, 43, 44, 45, 46, 47,
250       48, 49, 50, 51, 52, 53, 54, 55,
251       56, 57, 58, 59, 60, 61, 62, 63,
252       64, 97, 98, 99,100,101,102,103,
253      104,105,106,107,108,109,110,111,
254      112,113,114,115,116,117,118,119,
255      120,121,122, 91, 92, 93, 94, 95,
256       96, 65, 66, 67, 68, 69, 70, 71,
257       72, 73, 74, 75, 76, 77, 78, 79,
258       80, 81, 82, 83, 84, 85, 86, 87,
259       88, 89, 90,123,124,125,126,127,
260      128,129,130,131,132,133,134,135,
261      136,137,138,139,140,141,142,143,
262      144,145,146,147,148,149,150,151,
263      152,153,154,155,156,157,158,159,
264      160,161,162,163,164,165,166,167,
265      168,169,170,171,172,173,174,175,
266      176,177,178,179,180,181,182,183,
267      184,185,186,187,188,189,190,191,
268      192,193,194,195,196,197,198,199,
269      200,201,202,203,204,205,206,207,
270      208,209,210,211,212,213,214,215,
271      216,217,218,219,220,221,222,223,
272      224,225,226,227,228,229,230,231,
273      232,233,234,235,236,237,238,239,
274      240,241,242,243,244,245,246,247,
275      248,249,250,251,252,253,254,255,
276    
277    /* This table contains bit maps for various character classes. Each map is 32
278    bytes long and the bits run from the least significant end of each byte. The
279    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280    graph, print, punct, and cntrl. Other classes are built from combinations. */
281    
282      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286    
287      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291    
292      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296    
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301    
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306    
307      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311    
312      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316    
317      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321    
322      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326    
327      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332    /* This table identifies various classes of character by individual bits:
333      0x01   white space character
334      0x02   letter
335      0x04   decimal digit
336      0x08   hexadecimal digit
337      0x10   alphanumeric or '_'
338      0x80   regular expression metacharacter or binary zero
339    */
340    
341      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
342      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
345      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
346      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
347      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
348      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
349      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
350      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
351      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
352      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
353      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
354      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
355      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
356      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373    
374    /* This is a set of tables that came orginally from a Windows user. It seems to
375    be at least an approximation of ISO 8859. In particular, there are characters
376    greater than 128 that are marked as spaces, letters, etc. */
377    
378    static const unsigned char tables1[] = {
379    0,1,2,3,4,5,6,7,
380    8,9,10,11,12,13,14,15,
381    16,17,18,19,20,21,22,23,
382    24,25,26,27,28,29,30,31,
383    32,33,34,35,36,37,38,39,
384    40,41,42,43,44,45,46,47,
385    48,49,50,51,52,53,54,55,
386    56,57,58,59,60,61,62,63,
387    64,97,98,99,100,101,102,103,
388    104,105,106,107,108,109,110,111,
389    112,113,114,115,116,117,118,119,
390    120,121,122,91,92,93,94,95,
391    96,97,98,99,100,101,102,103,
392    104,105,106,107,108,109,110,111,
393    112,113,114,115,116,117,118,119,
394    120,121,122,123,124,125,126,127,
395    128,129,130,131,132,133,134,135,
396    136,137,138,139,140,141,142,143,
397    144,145,146,147,148,149,150,151,
398    152,153,154,155,156,157,158,159,
399    160,161,162,163,164,165,166,167,
400    168,169,170,171,172,173,174,175,
401    176,177,178,179,180,181,182,183,
402    184,185,186,187,188,189,190,191,
403    224,225,226,227,228,229,230,231,
404    232,233,234,235,236,237,238,239,
405    240,241,242,243,244,245,246,215,
406    248,249,250,251,252,253,254,223,
407    224,225,226,227,228,229,230,231,
408    232,233,234,235,236,237,238,239,
409    240,241,242,243,244,245,246,247,
410    248,249,250,251,252,253,254,255,
411    0,1,2,3,4,5,6,7,
412    8,9,10,11,12,13,14,15,
413    16,17,18,19,20,21,22,23,
414    24,25,26,27,28,29,30,31,
415    32,33,34,35,36,37,38,39,
416    40,41,42,43,44,45,46,47,
417    48,49,50,51,52,53,54,55,
418    56,57,58,59,60,61,62,63,
419    64,97,98,99,100,101,102,103,
420    104,105,106,107,108,109,110,111,
421    112,113,114,115,116,117,118,119,
422    120,121,122,91,92,93,94,95,
423    96,65,66,67,68,69,70,71,
424    72,73,74,75,76,77,78,79,
425    80,81,82,83,84,85,86,87,
426    88,89,90,123,124,125,126,127,
427    128,129,130,131,132,133,134,135,
428    136,137,138,139,140,141,142,143,
429    144,145,146,147,148,149,150,151,
430    152,153,154,155,156,157,158,159,
431    160,161,162,163,164,165,166,167,
432    168,169,170,171,172,173,174,175,
433    176,177,178,179,180,181,182,183,
434    184,185,186,187,188,189,190,191,
435    224,225,226,227,228,229,230,231,
436    232,233,234,235,236,237,238,239,
437    240,241,242,243,244,245,246,215,
438    248,249,250,251,252,253,254,223,
439    192,193,194,195,196,197,198,199,
440    200,201,202,203,204,205,206,207,
441    208,209,210,211,212,213,214,247,
442    216,217,218,219,220,221,222,255,
443    0,62,0,0,1,0,0,0,
444    0,0,0,0,0,0,0,0,
445    32,0,0,0,1,0,0,0,
446    0,0,0,0,0,0,0,0,
447    0,0,0,0,0,0,255,3,
448    126,0,0,0,126,0,0,0,
449    0,0,0,0,0,0,0,0,
450    0,0,0,0,0,0,0,0,
451    0,0,0,0,0,0,255,3,
452    0,0,0,0,0,0,0,0,
453    0,0,0,0,0,0,12,2,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,0,0,
456    254,255,255,7,0,0,0,0,
457    0,0,0,0,0,0,0,0,
458    255,255,127,127,0,0,0,0,
459    0,0,0,0,0,0,0,0,
460    0,0,0,0,254,255,255,7,
461    0,0,0,0,0,4,32,4,
462    0,0,0,128,255,255,127,255,
463    0,0,0,0,0,0,255,3,
464    254,255,255,135,254,255,255,7,
465    0,0,0,0,0,4,44,6,
466    255,255,127,255,255,255,127,255,
467    0,0,0,0,254,255,255,255,
468    255,255,255,255,255,255,255,127,
469    0,0,0,0,254,255,255,255,
470    255,255,255,255,255,255,255,255,
471    0,2,0,0,255,255,255,255,
472    255,255,255,255,255,255,255,127,
473    0,0,0,0,255,255,255,255,
474    255,255,255,255,255,255,255,255,
475    0,0,0,0,254,255,0,252,
476    1,0,0,248,1,0,0,120,
477    0,0,0,0,254,255,255,255,
478    0,0,128,0,0,0,128,0,
479    255,255,255,255,0,0,0,0,
480    0,0,0,0,0,0,0,128,
481    255,255,255,255,0,0,0,0,
482    0,0,0,0,0,0,0,0,
483    128,0,0,0,0,0,0,0,
484    0,1,1,0,1,1,0,0,
485    0,0,0,0,0,0,0,0,
486    0,0,0,0,0,0,0,0,
487    1,0,0,0,128,0,0,0,
488    128,128,128,128,0,0,128,0,
489    28,28,28,28,28,28,28,28,
490    28,28,0,0,0,0,0,128,
491    0,26,26,26,26,26,26,18,
492    18,18,18,18,18,18,18,18,
493    18,18,18,18,18,18,18,18,
494    18,18,18,128,128,0,128,16,
495    0,26,26,26,26,26,26,18,
496    18,18,18,18,18,18,18,18,
497    18,18,18,18,18,18,18,18,
498    18,18,18,128,128,0,0,0,
499    0,0,0,0,0,1,0,0,
500    0,0,0,0,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    1,0,0,0,0,0,0,0,
504    0,0,18,0,0,0,0,0,
505    0,0,20,20,0,18,0,0,
506    0,20,18,0,0,0,0,0,
507    18,18,18,18,18,18,18,18,
508    18,18,18,18,18,18,18,18,
509    18,18,18,18,18,18,18,0,
510    18,18,18,18,18,18,18,18,
511    18,18,18,18,18,18,18,18,
512    18,18,18,18,18,18,18,18,
513    18,18,18,18,18,18,18,0,
514    18,18,18,18,18,18,18,18
515  };  };
516    
517    
518  static void print_internals(pcre *re, FILE *outfile)  
519    
520    #ifndef HAVE_STRERROR
521    /*************************************************
522    *     Provide strerror() for non-ANSI libraries  *
523    *************************************************/
524    
525    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
526    in their libraries, but can provide the same facility by this simple
527    alternative function. */
528    
529    extern int   sys_nerr;
530    extern char *sys_errlist[];
531    
532    char *
533    strerror(int n)
534  {  {
535  unsigned char *code = ((real_pcre *)re)->code;  if (n < 0 || n >= sys_nerr) return "unknown error number";
536    return sys_errlist[n];
537    }
538    #endif /* HAVE_STRERROR */
539    
540    
 fprintf(outfile, "------------------------------------------------------------------\n");  
541    
542  for(;;)  
543    /*************************************************
544    *        Read or extend an input line            *
545    *************************************************/
546    
547    /* Input lines are read into buffer, but both patterns and data lines can be
548    continued over multiple input lines. In addition, if the buffer fills up, we
549    want to automatically expand it so as to be able to handle extremely large
550    lines that are needed for certain stress tests. When the input buffer is
551    expanded, the other two buffers must also be expanded likewise, and the
552    contents of pbuffer, which are a copy of the input for callouts, must be
553    preserved (for when expansion happens for a data line). This is not the most
554    optimal way of handling this, but hey, this is just a test program!
555    
556    Arguments:
557      f            the file to read
558      start        where in buffer to start (this *must* be within buffer)
559      prompt       for stdin or readline()
560    
561    Returns:       pointer to the start of new data
562                   could be a copy of start, or could be moved
563                   NULL if no data read and EOF reached
564    */
565    
566    static uschar *
567    extend_inputline(FILE *f, uschar *start, const char *prompt)
568    {
569    uschar *here = start;
570    
571    for (;;)
572    {    {
573    int c;    int rlen = (int)(buffer_size - (here - buffer));
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
574    
575          case OP_CRRANGE:    if (rlen > 1000)
576          case OP_CRMINRANGE:      {
577          min = (code[1] << 8) + code[2];      int dlen;
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
578    
579          default:      /* If libreadline support is required, use readline() to read a line if the
580          code--;      input is a terminal. Note that readline() removes the trailing newline, so
581          }      we must put it back again, to be compatible with fgets(). */
582    
583    #ifdef SUPPORT_LIBREADLINE
584        if (isatty(fileno(f)))
585          {
586          size_t len;
587          char *s = readline(prompt);
588          if (s == NULL) return (here == start)? NULL : start;
589          len = strlen(s);
590          if (len > 0) add_history(s);
591          if (len > rlen - 1) len = rlen - 1;
592          memcpy(here, s, len);
593          here[len] = '\n';
594          here[len+1] = 0;
595          free(s);
596        }        }
597      break;      else
598    #endif
599    
600      /* Anything else is just a one-node item */      /* Read the next line by normal means, prompting if the file is stdin. */
601    
602      default:        {
603      fprintf(outfile, "    %s", OP_names[*code]);        if (f == stdin) printf("%s", prompt);
604      break;        if (fgets((char *)here, rlen,  f) == NULL)
605            return (here == start)? NULL : start;
606          }
607    
608        dlen = (int)strlen((char *)here);
609        if (dlen > 0 && here[dlen - 1] == '\n') return start;
610        here += dlen;
611      }      }
612    
613    code++;    else
614    fprintf(outfile, "\n");      {
615        int new_buffer_size = 2*buffer_size;
616        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
617        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
618        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
619    
620        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
621          {
622          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
623          exit(1);
624          }
625    
626        memcpy(new_buffer, buffer, buffer_size);
627        memcpy(new_pbuffer, pbuffer, buffer_size);
628    
629        buffer_size = new_buffer_size;
630    
631        start = new_buffer + (start - buffer);
632        here = new_buffer + (here - buffer);
633    
634        free(buffer);
635        free(dbuffer);
636        free(pbuffer);
637    
638        buffer = new_buffer;
639        dbuffer = new_dbuffer;
640        pbuffer = new_pbuffer;
641        }
642    }    }
643    
644    return NULL;  /* Control never gets here */
645  }  }
646    
647    
648    
 /* Character string printing function. */  
649    
650  static void pchars(unsigned char *p, int length)  
651    
652    
653    /*************************************************
654    *          Read number from string               *
655    *************************************************/
656    
657    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
658    around with conditional compilation, just do the job by hand. It is only used
659    for unpicking arguments, so just keep it simple.
660    
661    Arguments:
662      str           string to be converted
663      endptr        where to put the end pointer
664    
665    Returns:        the unsigned long
666    */
667    
668    static int
669    get_value(unsigned char *str, unsigned char **endptr)
670  {  {
671  int c;  int result = 0;
672    while(*str != 0 && isspace(*str)) str++;
673    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
674    *endptr = str;
675    return(result);
676    }
677    
678    
679    
680    
681    /*************************************************
682    *            Convert UTF-8 string to value       *
683    *************************************************/
684    
685    /* This function takes one or more bytes that represents a UTF-8 character,
686    and returns the value of the character.
687    
688    Argument:
689      utf8bytes   a pointer to the byte vector
690      vptr        a pointer to an int to receive the value
691    
692    Returns:      >  0 => the number of bytes consumed
693                  -6 to 0 => malformed UTF-8 character at offset = (-return)
694    */
695    
696    #if !defined NOUTF8
697    
698    static int
699    utf82ord(unsigned char *utf8bytes, int *vptr)
700    {
701    int c = *utf8bytes++;
702    int d = c;
703    int i, j, s;
704    
705    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
706      {
707      if ((d & 0x80) == 0) break;
708      d <<= 1;
709      }
710    
711    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
712    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
713    
714    /* i now has a value in the range 1-5 */
715    
716    s = 6*i;
717    d = (c & utf8_table3[i]) << s;
718    
719    for (j = 0; j < i; j++)
720      {
721      c = *utf8bytes++;
722      if ((c & 0xc0) != 0x80) return -(j+1);
723      s -= 6;
724      d |= (c & 0x3f) << s;
725      }
726    
727    /* Check that encoding was the correct unique one */
728    
729    for (j = 0; j < utf8_table1_size; j++)
730      if (d <= utf8_table1[j]) break;
731    if (j != i) return -(i+1);
732    
733    /* Valid value */
734    
735    *vptr = d;
736    return i+1;
737    }
738    
739    #endif
740    
741    
742    
743    /*************************************************
744    *       Convert character value to UTF-8         *
745    *************************************************/
746    
747    /* This function takes an integer value in the range 0 - 0x7fffffff
748    and encodes it as a UTF-8 character in 0 to 6 bytes.
749    
750    Arguments:
751      cvalue     the character value
752      utf8bytes  pointer to buffer for result - at least 6 bytes long
753    
754    Returns:     number of characters placed in the buffer
755    */
756    
757    #if !defined NOUTF8
758    
759    static int
760    ord2utf8(int cvalue, uschar *utf8bytes)
761    {
762    register int i, j;
763    for (i = 0; i < utf8_table1_size; i++)
764      if (cvalue <= utf8_table1[i]) break;
765    utf8bytes += i;
766    for (j = i; j > 0; j--)
767     {
768     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
769     cvalue >>= 6;
770     }
771    *utf8bytes = utf8_table2[i] | cvalue;
772    return i + 1;
773    }
774    
775    #endif
776    
777    
778    
779    /*************************************************
780    *             Print character string             *
781    *************************************************/
782    
783    /* Character string printing function. Must handle UTF-8 strings in utf8
784    mode. Yields number of characters printed. If handed a NULL file, just counts
785    chars without printing. */
786    
787    static int pchars(unsigned char *p, int length, FILE *f)
788    {
789    int c = 0;
790    int yield = 0;
791    
792  while (length-- > 0)  while (length-- > 0)
793    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
794      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
795      if (use_utf8)
796        {
797        int rc = utf82ord(p, &c);
798    
799        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
800          {
801          length -= rc - 1;
802          p += rc;
803          if (PRINTHEX(c))
804            {
805            if (f != NULL) fprintf(f, "%c", c);
806            yield++;
807            }
808          else
809            {
810            int n = 4;
811            if (f != NULL) fprintf(f, "\\x{%02x}", c);
812            yield += (n <= 0x000000ff)? 2 :
813                     (n <= 0x00000fff)? 3 :
814                     (n <= 0x0000ffff)? 4 :
815                     (n <= 0x000fffff)? 5 : 6;
816            }
817          continue;
818          }
819        }
820    #endif
821    
822       /* Not UTF-8, or malformed UTF-8  */
823    
824      c = *p++;
825      if (PRINTHEX(c))
826        {
827        if (f != NULL) fprintf(f, "%c", c);
828        yield++;
829        }
830      else
831        {
832        if (f != NULL) fprintf(f, "\\x%02x", c);
833        yield += 4;
834        }
835      }
836    
837    return yield;
838  }  }
839    
840    
841    
842    /*************************************************
843    *              Callout function                  *
844    *************************************************/
845    
846    /* Called from PCRE as a result of the (?C) item. We print out where we are in
847    the match. Yield zero unless more callouts than the fail count, or the callout
848    data is not zero. */
849    
850    static int callout(pcre_callout_block *cb)
851    {
852    FILE *f = (first_callout | callout_extra)? outfile : NULL;
853    int i, pre_start, post_start, subject_length;
854    
855    if (callout_extra)
856      {
857      fprintf(f, "Callout %d: last capture = %d\n",
858        cb->callout_number, cb->capture_last);
859    
860      for (i = 0; i < cb->capture_top * 2; i += 2)
861        {
862        if (cb->offset_vector[i] < 0)
863          fprintf(f, "%2d: <unset>\n", i/2);
864        else
865          {
866          fprintf(f, "%2d: ", i/2);
867          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
868            cb->offset_vector[i+1] - cb->offset_vector[i], f);
869          fprintf(f, "\n");
870          }
871        }
872      }
873    
874    /* Re-print the subject in canonical form, the first time or if giving full
875    datails. On subsequent calls in the same match, we use pchars just to find the
876    printed lengths of the substrings. */
877    
878    if (f != NULL) fprintf(f, "--->");
879    
880    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
881    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
882      cb->current_position - cb->start_match, f);
883    
884    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
885    
886    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
887      cb->subject_length - cb->current_position, f);
888    
889    if (f != NULL) fprintf(f, "\n");
890    
891    /* Always print appropriate indicators, with callout number if not already
892    shown. For automatic callouts, show the pattern offset. */
893    
894    if (cb->callout_number == 255)
895      {
896      fprintf(outfile, "%+3d ", cb->pattern_position);
897      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
898      }
899    else
900      {
901      if (callout_extra) fprintf(outfile, "    ");
902        else fprintf(outfile, "%3d ", cb->callout_number);
903      }
904    
905    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
906    fprintf(outfile, "^");
907    
908    if (post_start > 0)
909      {
910      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
911      fprintf(outfile, "^");
912      }
913    
914    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
915      fprintf(outfile, " ");
916    
917    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
918      pbuffer + cb->pattern_position);
919    
920    fprintf(outfile, "\n");
921    first_callout = 0;
922    
923    if (cb->callout_data != NULL)
924      {
925      int callout_data = *((int *)(cb->callout_data));
926      if (callout_data != 0)
927        {
928        fprintf(outfile, "Callout data = %d\n", callout_data);
929        return callout_data;
930        }
931      }
932    
933    return (cb->callout_number != callout_fail_id)? 0 :
934           (++callout_count >= callout_fail_count)? 1 : 0;
935    }
936    
937    
938    /*************************************************
939    *            Local malloc functions              *
940    *************************************************/
941    
942  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
943  compiled re. */  compiled re. */
944    
945  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
946  {  {
947  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
948  return malloc(size);  gotten_store = size;
949    if (show_malloc)
950      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
951    return block;
952    }
953    
954    static void new_free(void *block)
955    {
956    if (show_malloc)
957      fprintf(outfile, "free             %p\n", block);
958    free(block);
959    }
960    
961    
962    /* For recursion malloc/free, to test stacking calls */
963    
964    static void *stack_malloc(size_t size)
965    {
966    void *block = malloc(size);
967    if (show_malloc)
968      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
969    return block;
970    }
971    
972    static void stack_free(void *block)
973    {
974    if (show_malloc)
975      fprintf(outfile, "stack_free       %p\n", block);
976    free(block);
977    }
978    
979    
980    /*************************************************
981    *          Call pcre_fullinfo()                  *
982    *************************************************/
983    
984    /* Get one piece of information from the pcre_fullinfo() function */
985    
986    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
987    {
988    int rc;
989    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
990      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
991    }
992    
993    
994    
995    /*************************************************
996    *         Byte flipping function                 *
997    *************************************************/
998    
999    static unsigned long int
1000    byteflip(unsigned long int value, int n)
1001    {
1002    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1003    return ((value & 0x000000ff) << 24) |
1004           ((value & 0x0000ff00) <<  8) |
1005           ((value & 0x00ff0000) >>  8) |
1006           ((value & 0xff000000) >> 24);
1007    }
1008    
1009    
1010    
1011    
1012    /*************************************************
1013    *        Check match or recursion limit          *
1014    *************************************************/
1015    
1016    static int
1017    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
1018      int start_offset, int options, int *use_offsets, int use_size_offsets,
1019      int flag, unsigned long int *limit, int errnumber, const char *msg)
1020    {
1021    int count;
1022    int min = 0;
1023    int mid = 64;
1024    int max = -1;
1025    
1026    extra->flags |= flag;
1027    
1028    for (;;)
1029      {
1030      *limit = mid;
1031    
1032      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
1033        use_offsets, use_size_offsets);
1034    
1035      if (count == errnumber)
1036        {
1037        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1038        min = mid;
1039        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1040        }
1041    
1042      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1043                             count == PCRE_ERROR_PARTIAL)
1044        {
1045        if (mid == min + 1)
1046          {
1047          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
1048          break;
1049          }
1050        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
1051        max = mid;
1052        mid = (min + mid)/2;
1053        }
1054      else break;    /* Some other error */
1055      }
1056    
1057    extra->flags &= ~flag;
1058    return count;
1059    }
1060    
1061    
1062    
1063    /*************************************************
1064    *         Case-independent strncmp() function    *
1065    *************************************************/
1066    
1067    /*
1068    Arguments:
1069      s         first string
1070      t         second string
1071      n         number of characters to compare
1072    
1073    Returns:    < 0, = 0, or > 0, according to the comparison
1074    */
1075    
1076    static int
1077    strncmpic(uschar *s, uschar *t, int n)
1078    {
1079    while (n--)
1080      {
1081      int c = tolower(*s++) - tolower(*t++);
1082      if (c) return c;
1083      }
1084    return 0;
1085    }
1086    
1087    
1088    
1089    /*************************************************
1090    *         Check newline indicator                *
1091    *************************************************/
1092    
1093    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1094    a message and return 0 if there is no match.
1095    
1096    Arguments:
1097      p           points after the leading '<'
1098      f           file for error message
1099    
1100    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1101    */
1102    
1103    static int
1104    check_newline(uschar *p, FILE *f)
1105    {
1106    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1107    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1108    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1109    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1110    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1111    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1112    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1113    fprintf(f, "Unknown newline type at: <%s\n", p);
1114    return 0;
1115  }  }
1116    
1117    
1118    
1119    /*************************************************
1120    *             Usage function                     *
1121    *************************************************/
1122    
1123    static void
1124    usage(void)
1125    {
1126    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1127    printf("Input and output default to stdin and stdout.\n");
1128    #ifdef SUPPORT_LIBREADLINE
1129    printf("If input is a terminal, readline() is used to read from it.\n");
1130    #else
1131    printf("This version of pcretest is not linked with readline().\n");
1132    #endif
1133    printf("\nOptions:\n");
1134    printf("  -b       show compiled code (bytecode)\n");
1135    printf("  -C       show PCRE compile-time options and exit\n");
1136    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1137    #if !defined NODFA
1138    printf("  -dfa     force DFA matching for all subjects\n");
1139    #endif
1140    printf("  -help    show usage information\n");
1141    printf("  -i       show information about compiled patterns\n"
1142           "  -M       find MATCH_LIMIT minimum for each subject\n"
1143           "  -m       output memory used information\n"
1144           "  -o <n>   set size of offsets vector to <n>\n");
1145    #if !defined NOPOSIX
1146    printf("  -p       use POSIX interface\n");
1147    #endif
1148    printf("  -q       quiet: do not output PCRE version number at start\n");
1149    printf("  -S <n>   set stack size to <n> megabytes\n");
1150    printf("  -s       output store (memory) used information\n"
1151           "  -t       time compilation and execution\n");
1152    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1153    printf("  -tm      time execution (matching) only\n");
1154    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1155    }
1156    
1157    
1158    
1159    /*************************************************
1160    *                Main Program                    *
1161    *************************************************/
1162    
1163  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
1164  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
1165  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 289  int main(int argc, char **argv) Line 1169  int main(int argc, char **argv)
1169  FILE *infile = stdin;  FILE *infile = stdin;
1170  int options = 0;  int options = 0;
1171  int study_options = 0;  int study_options = 0;
1172    int default_find_match_limit = FALSE;
1173  int op = 1;  int op = 1;
1174  int timeit = 0;  int timeit = 0;
1175    int timeitm = 0;
1176  int showinfo = 0;  int showinfo = 0;
1177    int showstore = 0;
1178    int quiet = 0;
1179    int size_offsets = 45;
1180    int size_offsets_max;
1181    int *offsets = NULL;
1182    #if !defined NOPOSIX
1183  int posix = 0;  int posix = 0;
1184    #endif
1185  int debug = 0;  int debug = 0;
1186  int done = 0;  int done = 0;
1187  unsigned char buffer[30000];  int all_use_dfa = 0;
1188  unsigned char dbuffer[1024];  int yield = 0;
1189    int stack_size;
1190    
1191    /* These vectors store, end-to-end, a list of captured substring names. Assume
1192    that 1024 is plenty long enough for the few names we'll be testing. */
1193    
1194    uschar copynames[1024];
1195    uschar getnames[1024];
1196    
1197  /* Static so that new_malloc can use it. */  uschar *copynamesptr;
1198    uschar *getnamesptr;
1199    
1200    /* Get buffers from malloc() so that Electric Fence will check their misuse
1201    when I am debugging. They grow automatically when very long lines are read. */
1202    
1203    buffer = (unsigned char *)malloc(buffer_size);
1204    dbuffer = (unsigned char *)malloc(buffer_size);
1205    pbuffer = (unsigned char *)malloc(buffer_size);
1206    
1207    /* The outfile variable is static so that new_malloc can use it. */
1208    
1209  outfile = stdout;  outfile = stdout;
1210    
1211    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1212    library to translate CRLF into a single LF character. At least, that's what
1213    I've been told: never having used Windows I take this all on trust. Originally
1214    it set 0x8000, but then I was advised that _O_BINARY was better. */
1215    
1216    #if defined(_WIN32) || defined(WIN32)
1217    _setmode( _fileno( stdout ), _O_BINARY );
1218    #endif
1219    
1220  /* Scan options */  /* Scan options */
1221    
1222  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1223    {    {
1224    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
1225    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
1226      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1227        showstore = 1;
1228      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1229      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1230    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1231    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1232      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1233    #if !defined NODFA
1234      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1235    #endif
1236      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1237          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
1238            *endptr == 0))
1239        {
1240        op++;
1241        argc--;
1242        }
1243      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1244        {
1245        int both = argv[op][2] == 0;
1246        int temp;
1247        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1248                         *endptr == 0))
1249          {
1250          timeitm = temp;
1251          op++;
1252          argc--;
1253          }
1254        else timeitm = LOOPREPEAT;
1255        if (both) timeit = timeitm;
1256        }
1257      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1258          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1259            *endptr == 0))
1260        {
1261    #if defined(_WIN32) || defined(WIN32)
1262        printf("PCRE: -S not supported on this OS\n");
1263        exit(1);
1264    #else
1265        int rc;
1266        struct rlimit rlim;
1267        getrlimit(RLIMIT_STACK, &rlim);
1268        rlim.rlim_cur = stack_size * 1024 * 1024;
1269        rc = setrlimit(RLIMIT_STACK, &rlim);
1270        if (rc != 0)
1271          {
1272        printf("PCRE: setrlimit() failed with error %d\n", rc);
1273        exit(1);
1274          }
1275        op++;
1276        argc--;
1277    #endif
1278        }
1279    #if !defined NOPOSIX
1280    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1281    #endif
1282      else if (strcmp(argv[op], "-C") == 0)
1283        {
1284        int rc;
1285        unsigned long int lrc;
1286        printf("PCRE version %s\n", pcre_version());
1287        printf("Compiled with\n");
1288        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1289        printf("  %sUTF-8 support\n", rc? "" : "No ");
1290        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1291        printf("  %sUnicode properties support\n", rc? "" : "No ");
1292        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1293        /* Note that these values are always the ASCII values, even
1294        in EBCDIC environments. CR is 13 and NL is 10. */
1295        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1296          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1297          (rc == -2)? "ANYCRLF" :
1298          (rc == -1)? "ANY" : "???");
1299        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1300        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1301                                         "all Unicode newlines");
1302        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1303        printf("  Internal link size = %d\n", rc);
1304        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1305        printf("  POSIX malloc threshold = %d\n", rc);
1306        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1307        printf("  Default match limit = %ld\n", lrc);
1308        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1309        printf("  Default recursion depth limit = %ld\n", lrc);
1310        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1311        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1312        goto EXIT;
1313        }
1314      else if (strcmp(argv[op], "-help") == 0 ||
1315               strcmp(argv[op], "--help") == 0)
1316        {
1317        usage();
1318        goto EXIT;
1319        }
1320    else    else
1321      {      {
1322      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1323      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
1324      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
1325             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
1326      }      }
1327    op++;    op++;
1328    argc--;    argc--;
1329    }    }
1330    
1331    /* Get the store for the offsets vector, and remember what it was */
1332    
1333    size_offsets_max = size_offsets;
1334    offsets = (int *)malloc(size_offsets_max * sizeof(int));
1335    if (offsets == NULL)
1336      {
1337      printf("** Failed to get %d bytes of memory for offsets vector\n",
1338        (int)(size_offsets_max * sizeof(int)));
1339      yield = 1;
1340      goto EXIT;
1341      }
1342    
1343  /* Sort out the input and output files */  /* Sort out the input and output files */
1344    
1345  if (argc > 1)  if (argc > 1)
1346    {    {
1347    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
1348    if (infile == NULL)    if (infile == NULL)
1349      {      {
1350      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
1351      return 1;      yield = 1;
1352        goto EXIT;
1353      }      }
1354    }    }
1355    
1356  if (argc > 2)  if (argc > 2)
1357    {    {
1358    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1359    if (outfile == NULL)    if (outfile == NULL)
1360      {      {
1361      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1362      return 1;      yield = 1;
1363        goto EXIT;
1364      }      }
1365    }    }
1366    
1367  /* Set alternative malloc function */  /* Set alternative malloc function */
1368    
1369  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1370    pcre_free = new_free;
1371    pcre_stack_malloc = stack_malloc;
1372    pcre_stack_free = stack_free;
1373    
1374  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1375    
1376  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1377    
1378  /* Main loop */  /* Main loop */
1379    
# Line 362  while (!done) Line 1381  while (!done)
1381    {    {
1382    pcre *re = NULL;    pcre *re = NULL;
1383    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
1384    
1385    #if !defined NOPOSIX  /* There are still compilers that require no indent */
1386    regex_t preg;    regex_t preg;
1387      int do_posix = 0;
1388    #endif
1389    
1390    const char *error;    const char *error;
1391      unsigned char *markptr;
1392    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1393    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
1394      const unsigned char *tables = NULL;
1395      unsigned long int true_size, true_study_size = 0;
1396      size_t size, regex_gotten_store;
1397      int do_mark = 0;
1398    int do_study = 0;    int do_study = 0;
1399    int do_debug = debug;    int do_debug = debug;
1400      int do_G = 0;
1401      int do_g = 0;
1402    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1403    int do_posix = 0;    int do_showrest = 0;
1404    int erroroffset, len, delimiter;    int do_flip = 0;
1405      int erroroffset, len, delimiter, poffset;
1406    
1407      use_utf8 = 0;
1408      debug_lengths = 1;
1409    
1410    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
1411    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1412      fflush(outfile);
1413    
1414    p = buffer;    p = buffer;
1415    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1416    if (*p == 0) continue;    if (*p == 0) continue;
1417    
1418    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1419    complete, read more. */  
1420      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1421        {
1422        unsigned long int magic, get_options;
1423        uschar sbuf[8];
1424        FILE *f;
1425    
1426        p++;
1427        pp = p + (int)strlen((char *)p);
1428        while (isspace(pp[-1])) pp--;
1429        *pp = 0;
1430    
1431        f = fopen((char *)p, "rb");
1432        if (f == NULL)
1433          {
1434          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1435          continue;
1436          }
1437    
1438        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1439    
1440        true_size =
1441          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1442        true_study_size =
1443          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1444    
1445        re = (real_pcre *)new_malloc(true_size);
1446        regex_gotten_store = gotten_store;
1447    
1448        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1449    
1450        magic = ((real_pcre *)re)->magic_number;
1451        if (magic != MAGIC_NUMBER)
1452          {
1453          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1454            {
1455            do_flip = 1;
1456            }
1457          else
1458            {
1459            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1460            fclose(f);
1461            continue;
1462            }
1463          }
1464    
1465        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1466          do_flip? " (byte-inverted)" : "", p);
1467    
1468        /* Need to know if UTF-8 for printing data strings */
1469    
1470        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1471        use_utf8 = (get_options & PCRE_UTF8) != 0;
1472    
1473        /* Now see if there is any following study data */
1474    
1475        if (true_study_size != 0)
1476          {
1477          pcre_study_data *psd;
1478    
1479          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1480          extra->flags = PCRE_EXTRA_STUDY_DATA;
1481    
1482          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1483          extra->study_data = psd;
1484    
1485          if (fread(psd, 1, true_study_size, f) != true_study_size)
1486            {
1487            FAIL_READ:
1488            fprintf(outfile, "Failed to read data from %s\n", p);
1489            if (extra != NULL) new_free(extra);
1490            if (re != NULL) new_free(re);
1491            fclose(f);
1492            continue;
1493            }
1494          fprintf(outfile, "Study data loaded from %s\n", p);
1495          do_study = 1;     /* To get the data output if requested */
1496          }
1497        else fprintf(outfile, "No study data\n");
1498    
1499        fclose(f);
1500        goto SHOW_INFO;
1501        }
1502    
1503      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1504      the pattern; if is isn't complete, read more. */
1505    
1506    delimiter = *p++;    delimiter = *p++;
1507    
1508    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
1509      {      {
1510      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1511      goto SKIP_DATA;      goto SKIP_DATA;
1512      }      }
1513    
1514    pp = p;    pp = p;
1515      poffset = (int)(p - buffer);
1516    
1517    for(;;)    for(;;)
1518      {      {
1519      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
     if (*pp != 0) break;  
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
1520        {        {
1521        fprintf(outfile, "** Expression too long - missing delimiter?\n");        if (*pp == '\\' && pp[1] != 0) pp++;
1522        goto SKIP_DATA;          else if (*pp == delimiter) break;
1523          pp++;
1524        }        }
1525        if (*pp != 0) break;
1526      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if (fgets((char *)pp, len, infile) == NULL)  
1527        {        {
1528        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1529        done = 1;        done = 1;
# Line 415  while (!done) Line 1532  while (!done)
1532      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1533      }      }
1534    
1535    /* Terminate the pattern at the delimiter */    /* The buffer may have moved while being extended; reset the start of data
1536      pointer to the correct relative point in the buffer. */
1537    
1538      p = buffer + poffset;
1539    
1540      /* If the first character after the delimiter is backslash, make
1541      the pattern end with backslash. This is purely to provide a way
1542      of testing for the error message when a pattern ends with backslash. */
1543    
1544      if (pp[1] == '\\') *pp++ = '\\';
1545    
1546      /* Terminate the pattern at the delimiter, and save a copy of the pattern
1547      for callouts. */
1548    
1549    *pp++ = 0;    *pp++ = 0;
1550      strcpy((char *)pbuffer, (char *)p);
1551    
1552    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1553    
1554    options = 0;    options = 0;
1555    study_options = 0;    study_options = 0;
1556      log_store = showstore;  /* default from command line */
1557    
1558    while (*pp != 0)    while (*pp != 0)
1559      {      {
1560      switch (*pp++)      switch (*pp++)
1561        {        {
1562          case 'f': options |= PCRE_FIRSTLINE; break;
1563          case 'g': do_g = 1; break;
1564        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1565        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1566        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1567        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1568    
1569          case '+': do_showrest = 1; break;
1570        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1571          case 'B': do_debug = 1; break;
1572          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1573        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1574        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1575          case 'F': do_flip = 1; break;
1576          case 'G': do_G = 1; break;
1577        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1578          case 'J': options |= PCRE_DUPNAMES; break;
1579          case 'K': do_mark = 1; break;
1580          case 'M': log_store = 1; break;
1581          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1582    
1583    #if !defined NOPOSIX
1584        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1585    #endif
1586    
1587        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1588        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1589          case 'W': options |= PCRE_UCP; break;
1590        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1591          case 'Z': debug_lengths = 0; break;
1592          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1593          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1594    
1595          case 'T':
1596          switch (*pp++)
1597            {
1598            case '0': tables = tables0; break;
1599            case '1': tables = tables1; break;
1600    
1601            case '\r':
1602            case '\n':
1603            case ' ':
1604            case 0:
1605            fprintf(outfile, "** Missing table number after /T\n");
1606            goto SKIP_DATA;
1607    
1608            default:
1609            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1610            goto SKIP_DATA;
1611            }
1612          break;
1613    
1614        case 'L':        case 'L':
1615        ppp = pp;        ppp = pp;
1616        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1617          /* The '0' test is just in case this is an unterminated line. */
1618          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1619        *ppp = 0;        *ppp = 0;
1620        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1621          {          {
1622          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1623          goto SKIP_DATA;          goto SKIP_DATA;
1624          }          }
1625          locale_set = 1;
1626        tables = pcre_maketables();        tables = pcre_maketables();
1627        pp = ppp;        pp = ppp;
1628        break;        break;
1629    
1630        case '\n': case ' ': break;        case '>':
1631          to_file = pp;
1632          while (*pp != 0) pp++;
1633          while (isspace(pp[-1])) pp--;
1634          *pp = 0;
1635          break;
1636    
1637          case '<':
1638            {
1639            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1640              {
1641              options |= PCRE_JAVASCRIPT_COMPAT;
1642              pp += 3;
1643              }
1644            else
1645              {
1646              int x = check_newline(pp, outfile);
1647              if (x == 0) goto SKIP_DATA;
1648              options |= x;
1649              while (*pp++ != '>');
1650              }
1651            }
1652          break;
1653    
1654          case '\r':                      /* So that it works in Windows */
1655          case '\n':
1656          case ' ':
1657          break;
1658    
1659        default:        default:
1660        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1661        goto SKIP_DATA;        goto SKIP_DATA;
# Line 465  while (!done) Line 1666  while (!done)
1666    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
1667    local character tables. */    local character tables. */
1668    
1669    #if !defined NOPOSIX
1670    if (posix || do_posix)    if (posix || do_posix)
1671      {      {
1672      int rc;      int rc;
1673      int cflags = 0;      int cflags = 0;
1674    
1675      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1676      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1677        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1678        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1679        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1680        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1681        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1682    
1683      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1684    
1685      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 478  while (!done) Line 1687  while (!done)
1687    
1688      if (rc != 0)      if (rc != 0)
1689        {        {
1690        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1691        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1692        goto SKIP_DATA;        goto SKIP_DATA;
1693        }        }
# Line 487  while (!done) Line 1696  while (!done)
1696    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1697    
1698    else    else
1699    #endif  /* !defined NOPOSIX */
1700    
1701      {      {
1702      if (timeit)      unsigned long int get_options;
1703    
1704        if (timeit > 0)
1705        {        {
1706        register int i;        register int i;
1707        clock_t time_taken;        clock_t time_taken;
1708        clock_t start_time = clock();        clock_t start_time = clock();
1709        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1710          {          {
1711          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1712          if (re != NULL) free(re);          if (re != NULL) free(re);
1713          }          }
1714        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1715        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1716          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)timeit) /
1717              (double)CLOCKS_PER_SEC);
1718        }        }
1719    
1720      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 516  while (!done) Line 1730  while (!done)
1730          {          {
1731          for (;;)          for (;;)
1732            {            {
1733            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1734              {              {
1735              done = 1;              done = 1;
1736              goto CONTINUE;              goto CONTINUE;
# Line 530  while (!done) Line 1744  while (!done)
1744        goto CONTINUE;        goto CONTINUE;
1745        }        }
1746    
1747      /* Compilation succeeded; print data if required */      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1748        within the regex; check for this so that we know how to process the data
1749        lines. */
1750    
1751        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1752        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1753    
1754        /* Print information if required. There are now two info-returning
1755        functions. The old one has a limited interface and returns only limited
1756        data. Check that it agrees with the newer one. */
1757    
1758        if (log_store)
1759          fprintf(outfile, "Memory allocation (code space): %d\n",
1760            (int)(gotten_store -
1761                  sizeof(real_pcre) -
1762                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1763    
1764      if (do_showinfo)      /* Extract the size for possible writing before possibly flipping it,
1765        {      and remember the store that was got. */
       int first_char, count;  
1766    
1767        if (do_debug) print_internals(re, outfile);      true_size = ((real_pcre *)re)->size;
1768        regex_gotten_store = gotten_store;
       count = pcre_info(re, &options, &first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d while reading info\n", count);  
       else  
         {  
         fprintf(outfile, "Identifying subpattern count = %d\n", count);  
         if (options == 0) fprintf(outfile, "No options\n");  
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "",  
             ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
         if (first_char == -1)  
           {  
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
         else  
           {  
           if (isprint(first_char))  
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
         }  
       }  
1769    
1770      /* If /S was present, study the regexp to generate additional info to      /* If /S was present, study the regexp to generate additional info to
1771      help with the matching. */      help with the matching. */
1772    
1773      if (do_study)      if (do_study)
1774        {        {
1775        if (timeit)        if (timeit > 0)
1776          {          {
1777          register int i;          register int i;
1778          clock_t time_taken;          clock_t time_taken;
1779          clock_t start_time = clock();          clock_t start_time = clock();
1780          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1781            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1782          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1783          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1784          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1785            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)timeit) /
1786                (double)CLOCKS_PER_SEC);
1787          }          }
   
1788        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
1789        if (error != NULL)        if (error != NULL)
1790          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
1791        else if (extra == NULL)        else if (extra != NULL)
1792          fprintf(outfile, "Study returned NULL\n");          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1793          }
1794    
1795        /* If /K was present, we set up for handling MARK data. */
1796    
1797        if (do_mark)
1798          {
1799          if (extra == NULL)
1800            {
1801            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1802            extra->flags = 0;
1803            }
1804          extra->mark = &markptr;
1805          extra->flags |= PCRE_EXTRA_MARK;
1806          }
1807    
1808        /* If the 'F' option was present, we flip the bytes of all the integer
1809        fields in the regex data block and the study block. This is to make it
1810        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1811        compiled on a different architecture. */
1812    
1813        if (do_flip)
1814          {
1815          real_pcre *rre = (real_pcre *)re;
1816          rre->magic_number =
1817            byteflip(rre->magic_number, sizeof(rre->magic_number));
1818          rre->size = byteflip(rre->size, sizeof(rre->size));
1819          rre->options = byteflip(rre->options, sizeof(rre->options));
1820          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1821          rre->top_bracket =
1822            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1823          rre->top_backref =
1824            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1825          rre->first_byte =
1826            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1827          rre->req_byte =
1828            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1829          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1830            sizeof(rre->name_table_offset));
1831          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1832            sizeof(rre->name_entry_size));
1833          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1834            sizeof(rre->name_count));
1835    
1836          if (extra != NULL)
1837            {
1838            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1839            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1840            rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1841            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1842            }
1843          }
1844    
1845        /* Extract information from the compiled data if required */
1846    
1847        SHOW_INFO:
1848    
1849        if (do_debug)
1850          {
1851          fprintf(outfile, "------------------------------------------------------------------\n");
1852          pcre_printint(re, outfile, debug_lengths);
1853          }
1854    
1855        /* We already have the options in get_options (see above) */
1856    
1857        if (do_showinfo)
1858          {
1859          unsigned long int all_options;
1860    #if !defined NOINFOCHECK
1861          int old_first_char, old_options, old_count;
1862    #endif
1863          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1864            hascrorlf;
1865          int nameentrysize, namecount;
1866          const uschar *nametable;
1867    
1868          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1869          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1870          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1871          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1872          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1873          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1874          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1875          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1876          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1877          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1878          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1879    
1880    #if !defined NOINFOCHECK
1881          old_count = pcre_info(re, &old_options, &old_first_char);
1882          if (count < 0) fprintf(outfile,
1883            "Error %d from pcre_info()\n", count);
1884          else
1885            {
1886            if (old_count != count) fprintf(outfile,
1887              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1888                old_count);
1889    
1890            if (old_first_char != first_char) fprintf(outfile,
1891              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1892                first_char, old_first_char);
1893    
1894            if (old_options != (int)get_options) fprintf(outfile,
1895              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1896                get_options, old_options);
1897            }
1898    #endif
1899    
1900          if (size != regex_gotten_store) fprintf(outfile,
1901            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1902            (int)size, (int)regex_gotten_store);
1903    
1904          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1905          if (backrefmax > 0)
1906            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1907    
1908          if (namecount > 0)
1909            {
1910            fprintf(outfile, "Named capturing subpatterns:\n");
1911            while (namecount-- > 0)
1912              {
1913              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1914                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1915                GET2(nametable, 0));
1916              nametable += nameentrysize;
1917              }
1918            }
1919    
1920          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1921          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1922    
1923          all_options = ((real_pcre *)re)->options;
1924          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1925    
1926          if (get_options == 0) fprintf(outfile, "No options\n");
1927            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1928              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1929              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1930              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1931              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1932              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1933              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1934              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1935              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1936              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1937              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1938              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1939              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1940              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1941              ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1942              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1943              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1944    
1945          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1946    
1947        /* This looks at internal information. A bit kludgy to do it this        switch (get_options & PCRE_NEWLINE_BITS)
1948        way, but it is useful for testing. */          {
1949            case PCRE_NEWLINE_CR:
1950            fprintf(outfile, "Forced newline sequence: CR\n");
1951            break;
1952    
1953            case PCRE_NEWLINE_LF:
1954            fprintf(outfile, "Forced newline sequence: LF\n");
1955            break;
1956    
1957            case PCRE_NEWLINE_CRLF:
1958            fprintf(outfile, "Forced newline sequence: CRLF\n");
1959            break;
1960    
1961            case PCRE_NEWLINE_ANYCRLF:
1962            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1963            break;
1964    
1965            case PCRE_NEWLINE_ANY:
1966            fprintf(outfile, "Forced newline sequence: ANY\n");
1967            break;
1968    
1969            default:
1970            break;
1971            }
1972    
1973          if (first_char == -1)
1974            {
1975            fprintf(outfile, "First char at start or follows newline\n");
1976            }
1977          else if (first_char < 0)
1978            {
1979            fprintf(outfile, "No first char\n");
1980            }
1981          else
1982            {
1983            int ch = first_char & 255;
1984            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1985              "" : " (caseless)";
1986            if (PRINTHEX(ch))
1987              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1988            else
1989              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1990            }
1991    
1992        else if (do_showinfo)        if (need_char < 0)
1993            {
1994            fprintf(outfile, "No need char\n");
1995            }
1996          else
1997            {
1998            int ch = need_char & 255;
1999            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2000              "" : " (caseless)";
2001            if (PRINTHEX(ch))
2002              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2003            else
2004              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
2005            }
2006    
2007          /* Don't output study size; at present it is in any case a fixed
2008          value, but it varies, depending on the computer architecture, and
2009          so messes up the test suite. (And with the /F option, it might be
2010          flipped.) */
2011    
2012          if (do_study)
2013            {
2014            if (extra == NULL)
2015              fprintf(outfile, "Study returned NULL\n");
2016            else
2017              {
2018              uschar *start_bits = NULL;
2019              int minlength;
2020    
2021              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2022              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2023    
2024              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2025              if (start_bits == NULL)
2026                fprintf(outfile, "No set of starting bytes\n");
2027              else
2028                {
2029                int i;
2030                int c = 24;
2031                fprintf(outfile, "Starting byte set: ");
2032                for (i = 0; i < 256; i++)
2033                  {
2034                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
2035                    {
2036                    if (c > 75)
2037                      {
2038                      fprintf(outfile, "\n  ");
2039                      c = 2;
2040                      }
2041                    if (PRINTHEX(i) && i != ' ')
2042                      {
2043                      fprintf(outfile, "%c ", i);
2044                      c += 2;
2045                      }
2046                    else
2047                      {
2048                      fprintf(outfile, "\\x%02x ", i);
2049                      c += 5;
2050                      }
2051                    }
2052                  }
2053                fprintf(outfile, "\n");
2054                }
2055              }
2056            }
2057          }
2058    
2059        /* If the '>' option was present, we write out the regex to a file, and
2060        that is all. The first 8 bytes of the file are the regex length and then
2061        the study length, in big-endian order. */
2062    
2063        if (to_file != NULL)
2064          {
2065          FILE *f = fopen((char *)to_file, "wb");
2066          if (f == NULL)
2067            {
2068            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
2069            }
2070          else
2071          {          {
2072          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
2073          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (uschar)((true_size >> 24) & 255);
2074            fprintf(outfile, "No starting character set\n");          sbuf[1] = (uschar)((true_size >> 16) & 255);
2075            sbuf[2] = (uschar)((true_size >>  8) & 255);
2076            sbuf[3] = (uschar)((true_size) & 255);
2077    
2078            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2079            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2080            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2081            sbuf[7] = (uschar)((true_study_size) & 255);
2082    
2083            if (fwrite(sbuf, 1, 8, f) < 8 ||
2084                fwrite(re, 1, true_size, f) < true_size)
2085              {
2086              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
2087              }
2088          else          else
2089            {            {
2090            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
2091            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
2092              {              {
2093              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
2094                    true_study_size)
2095                {                {
2096                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
2097                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
2098                }                }
2099                else fprintf(outfile, "Study data written to %s\n", to_file);
2100    
2101              }              }
           fprintf(outfile, "\n");  
2102            }            }
2103            fclose(f);
2104            }
2105    
2106          new_free(re);
2107          if (extra != NULL) new_free(extra);
2108          if (locale_set)
2109            {
2110            new_free((void *)tables);
2111            setlocale(LC_CTYPE, "C");
2112            locale_set = 0;
2113          }          }
2114          continue;  /* With next regex */
2115        }        }
2116      }      }        /* End of non-POSIX compile */
2117    
2118    /* Read data lines and test them */    /* Read data lines and test them */
2119    
2120    for (;;)    for (;;)
2121      {      {
2122      unsigned char *q;      uschar *q;
2123        uschar *bptr;
2124        int *use_offsets = offsets;
2125        int use_size_offsets = size_offsets;
2126        int callout_data = 0;
2127        int callout_data_set = 0;
2128      int count, c;      int count, c;
2129      int offsets[45];      int copystrings = 0;
2130      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = default_find_match_limit;
2131        int getstrings = 0;
2132        int getlist = 0;
2133        int gmatched = 0;
2134        int start_offset = 0;
2135        int start_offset_sign = 1;
2136        int g_notempty = 0;
2137        int use_dfa = 0;
2138    
2139      options = 0;      options = 0;
2140    
2141      if (infile == stdin) printf("  data> ");      *copynames = 0;
2142      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
2143    
2144        copynamesptr = copynames;
2145        getnamesptr = getnames;
2146    
2147        pcre_callout = callout;
2148        first_callout = 1;
2149        callout_extra = 0;
2150        callout_count = 0;
2151        callout_fail_count = 999999;
2152        callout_fail_id = -1;
2153        show_malloc = 0;
2154    
2155        if (extra != NULL) extra->flags &=
2156          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2157    
2158        len = 0;
2159        for (;;)
2160        {        {
2161        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2162        goto CONTINUE;          {
2163            if (len > 0)    /* Reached EOF without hitting a newline */
2164              {
2165              fprintf(outfile, "\n");
2166              break;
2167              }
2168            done = 1;
2169            goto CONTINUE;
2170            }
2171          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2172          len = (int)strlen((char *)buffer);
2173          if (buffer[len-1] == '\n') break;
2174        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2175    
     len = (int)strlen((char *)buffer);  
2176      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2177      buffer[len] = 0;      buffer[len] = 0;
2178      if (len == 0) break;      if (len == 0) break;
# Line 663  while (!done) Line 2180  while (!done)
2180      p = buffer;      p = buffer;
2181      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2182    
2183      q = dbuffer;      bptr = q = dbuffer;
2184      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2185        {        {
2186        int i = 0;        int i = 0;
2187        int n = 0;        int n = 0;
2188    
2189        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
2190          {          {
2191          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 684  while (!done) Line 2202  while (!done)
2202          c -= '0';          c -= '0';
2203          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2204            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2205    
2206    #if !defined NOUTF8
2207            if (use_utf8 && c > 255)
2208              {
2209              unsigned char buff8[8];
2210              int ii, utn;
2211              utn = ord2utf8(c, buff8);
2212              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2213              c = buff8[ii];   /* Last byte */
2214              }
2215    #endif
2216          break;          break;
2217    
2218          case 'x':          case 'x':
2219    
2220            /* Handle \x{..} specially - new Perl thing for utf8 */
2221    
2222    #if !defined NOUTF8
2223            if (*p == '{')
2224              {
2225              unsigned char *pt = p;
2226              c = 0;
2227              while (isxdigit(*(++pt)))
2228                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
2229              if (*pt == '}')
2230                {
2231                unsigned char buff8[8];
2232                int ii, utn;
2233                if (use_utf8)
2234                  {
2235                  utn = ord2utf8(c, buff8);
2236                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2237                  c = buff8[ii];   /* Last byte */
2238                  }
2239                else
2240                 {
2241                 if (c > 255)
2242                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2243                     "UTF-8 mode is not enabled.\n"
2244                     "** Truncation will probably give the wrong result.\n", c);
2245                 }
2246                p = pt + 1;
2247                break;
2248                }
2249              /* Not correct form; fall through */
2250              }
2251    #endif
2252    
2253            /* Ordinary \x */
2254    
2255          c = 0;          c = 0;
2256          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2257            {            {
# Line 695  while (!done) Line 2260  while (!done)
2260            }            }
2261          break;          break;
2262    
2263          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
2264          p--;          p--;
2265          continue;          continue;
2266    
2267            case '>':
2268            if (*p == '-')
2269              {
2270              start_offset_sign = -1;
2271              p++;
2272              }
2273            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2274            start_offset *= start_offset_sign;
2275            continue;
2276    
2277          case 'A':  /* Option setting */          case 'A':  /* Option setting */
2278          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
2279          continue;          continue;
# Line 707  while (!done) Line 2282  while (!done)
2282          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
2283          continue;          continue;
2284    
2285            case 'C':
2286            if (isdigit(*p))    /* Set copy string */
2287              {
2288              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2289              copystrings |= 1 << n;
2290              }
2291            else if (isalnum(*p))
2292              {
2293              uschar *npp = copynamesptr;
2294              while (isalnum(*p)) *npp++ = *p++;
2295              *npp++ = 0;
2296              *npp = 0;
2297              n = pcre_get_stringnumber(re, (char *)copynamesptr);
2298              if (n < 0)
2299                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2300              copynamesptr = npp;
2301              }
2302            else if (*p == '+')
2303              {
2304              callout_extra = 1;
2305              p++;
2306              }
2307            else if (*p == '-')
2308              {
2309              pcre_callout = NULL;
2310              p++;
2311              }
2312            else if (*p == '!')
2313              {
2314              callout_fail_id = 0;
2315              p++;
2316              while(isdigit(*p))
2317                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
2318              callout_fail_count = 0;
2319              if (*p == '!')
2320                {
2321                p++;
2322                while(isdigit(*p))
2323                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
2324                }
2325              }
2326            else if (*p == '*')
2327              {
2328              int sign = 1;
2329              callout_data = 0;
2330              if (*(++p) == '-') { sign = -1; p++; }
2331              while(isdigit(*p))
2332                callout_data = callout_data * 10 + *p++ - '0';
2333              callout_data *= sign;
2334              callout_data_set = 1;
2335              }
2336            continue;
2337    
2338    #if !defined NODFA
2339            case 'D':
2340    #if !defined NOPOSIX
2341            if (posix || do_posix)
2342              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
2343            else
2344    #endif
2345              use_dfa = 1;
2346            continue;
2347    #endif
2348    
2349    #if !defined NODFA
2350            case 'F':
2351            options |= PCRE_DFA_SHORTEST;
2352            continue;
2353    #endif
2354    
2355            case 'G':
2356            if (isdigit(*p))
2357              {
2358              while(isdigit(*p)) n = n * 10 + *p++ - '0';
2359              getstrings |= 1 << n;
2360              }
2361            else if (isalnum(*p))
2362              {
2363              uschar *npp = getnamesptr;
2364              while (isalnum(*p)) *npp++ = *p++;
2365              *npp++ = 0;
2366              *npp = 0;
2367              n = pcre_get_stringnumber(re, (char *)getnamesptr);
2368              if (n < 0)
2369                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2370              getnamesptr = npp;
2371              }
2372            continue;
2373    
2374            case 'L':
2375            getlist = 1;
2376            continue;
2377    
2378            case 'M':
2379            find_match_limit = 1;
2380            continue;
2381    
2382            case 'N':
2383            if ((options & PCRE_NOTEMPTY) != 0)
2384              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2385            else
2386              options |= PCRE_NOTEMPTY;
2387            continue;
2388    
2389          case 'O':          case 'O':
2390          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
2391          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
2392              {
2393              size_offsets_max = n;
2394              free(offsets);
2395              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2396              if (offsets == NULL)
2397                {
2398                printf("** Failed to get %d bytes of memory for offsets vector\n",
2399                  (int)(size_offsets_max * sizeof(int)));
2400                yield = 1;
2401                goto EXIT;
2402                }
2403              }
2404            use_size_offsets = n;
2405            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
2406            continue;
2407    
2408            case 'P':
2409            options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2410              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2411            continue;
2412    
2413            case 'Q':
2414            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2415            if (extra == NULL)
2416              {
2417              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2418              extra->flags = 0;
2419              }
2420            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2421            extra->match_limit_recursion = n;
2422            continue;
2423    
2424            case 'q':
2425            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2426            if (extra == NULL)
2427              {
2428              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2429              extra->flags = 0;
2430              }
2431            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2432            extra->match_limit = n;
2433            continue;
2434    
2435    #if !defined NODFA
2436            case 'R':
2437            options |= PCRE_DFA_RESTART;
2438            continue;
2439    #endif
2440    
2441            case 'S':
2442            show_malloc = 1;
2443            continue;
2444    
2445            case 'Y':
2446            options |= PCRE_NO_START_OPTIMIZE;
2447          continue;          continue;
2448    
2449          case 'Z':          case 'Z':
2450          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2451          continue;          continue;
2452    
2453            case '?':
2454            options |= PCRE_NO_UTF8_CHECK;
2455            continue;
2456    
2457            case '<':
2458              {
2459              int x = check_newline(p, outfile);
2460              if (x == 0) goto NEXT_DATA;
2461              options |= x;
2462              while (*p++ != '>');
2463              }
2464            continue;
2465          }          }
2466        *q++ = c;        *q++ = c;
2467        }        }
2468      *q = 0;      *q = 0;
2469      len = q - dbuffer;      len = (int)(q - dbuffer);
2470    
2471        /* Move the data to the end of the buffer so that a read over the end of
2472        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2473        we are using the POSIX interface, we must include the terminating zero. */
2474    
2475    #if !defined NOPOSIX
2476        if (posix || do_posix)
2477          {
2478          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2479          bptr += buffer_size - len - 1;
2480          }
2481        else
2482    #endif
2483          {
2484          memmove(bptr + buffer_size - len, bptr, len);
2485          bptr += buffer_size - len;
2486          }
2487    
2488        if ((all_use_dfa || use_dfa) && find_match_limit)
2489          {
2490          printf("**Match limit not relevant for DFA matching: ignored\n");
2491          find_match_limit = 0;
2492          }
2493    
2494      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2495      support timing. */      support timing or playing with the match limit or callout data. */
2496    
2497    #if !defined NOPOSIX
2498      if (posix || do_posix)      if (posix || do_posix)
2499        {        {
2500        int rc;        int rc;
2501        int eflags = 0;        int eflags = 0;
2502        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
2503          if (use_size_offsets > 0)
2504            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2505        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2506        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2507          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2508    
2509        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
2510    
2511        if (rc != 0)        if (rc != 0)
2512          {          {
2513          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2514          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2515          }          }
2516          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2517                  != 0)
2518            {
2519            fprintf(outfile, "Matched with REG_NOSUB\n");
2520            }
2521        else        else
2522          {          {
2523          size_t i;          size_t i;
2524          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2525            {            {
2526            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2527              {              {
2528              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
2529              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2530                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2531              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2532                if (i == 0 && do_showrest)
2533                  {
2534                  fprintf(outfile, " 0+ ");
2535                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2536                    outfile);
2537                  fprintf(outfile, "\n");
2538                  }
2539              }              }
2540            }            }
2541          }          }
2542          free(pmatch);
2543        }        }
2544    
2545      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
2546    
2547      else      else
2548    #endif  /* !defined NOPOSIX */
2549    
2550        for (;; gmatched++)    /* Loop for /g or /G */
2551        {        {
2552        if (timeit)        markptr = NULL;
2553    
2554          if (timeitm > 0)
2555          {          {
2556          register int i;          register int i;
2557          clock_t time_taken;          clock_t time_taken;
2558          clock_t start_time = clock();          clock_t start_time = clock();
2559          for (i = 0; i < 4000; i++)  
2560            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
2561              size_offsets);          if (all_use_dfa || use_dfa)
2562              {
2563              int workspace[1000];
2564              for (i = 0; i < timeitm; i++)
2565                count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2566                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2567                  sizeof(workspace)/sizeof(int));
2568              }
2569            else
2570    #endif
2571    
2572            for (i = 0; i < timeitm; i++)
2573              count = pcre_exec(re, extra, (char *)bptr, len,
2574                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2575    
2576          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2577          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2578            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)timeitm) /
2579                (double)CLOCKS_PER_SEC);
2580            }
2581    
2582          /* If find_match_limit is set, we want to do repeated matches with
2583          varying limits in order to find the minimum value for the match limit and
2584          for the recursion limit. */
2585    
2586          if (find_match_limit)
2587            {
2588            if (extra == NULL)
2589              {
2590              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2591              extra->flags = 0;
2592              }
2593    
2594            (void)check_match_limit(re, extra, bptr, len, start_offset,
2595              options|g_notempty, use_offsets, use_size_offsets,
2596              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2597              PCRE_ERROR_MATCHLIMIT, "match()");
2598    
2599            count = check_match_limit(re, extra, bptr, len, start_offset,
2600              options|g_notempty, use_offsets, use_size_offsets,
2601              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2602              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2603            }
2604    
2605          /* If callout_data is set, use the interface with additional data */
2606    
2607          else if (callout_data_set)
2608            {
2609            if (extra == NULL)
2610              {
2611              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2612              extra->flags = 0;
2613              }
2614            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2615            extra->callout_data = &callout_data;
2616            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2617              options | g_notempty, use_offsets, use_size_offsets);
2618            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2619          }          }
2620    
2621        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
2622          size_offsets);        value of match_limit. */
2623    
2624    #if !defined NODFA
2625          else if (all_use_dfa || use_dfa)
2626            {
2627            int workspace[1000];
2628            count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2629              options | g_notempty, use_offsets, use_size_offsets, workspace,
2630              sizeof(workspace)/sizeof(int));
2631            if (count == 0)
2632              {
2633              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2634              count = use_size_offsets/2;
2635              }
2636            }
2637    #endif
2638    
2639        if (count == 0)        else
2640          {          {
2641          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2642          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2643            if (count == 0)
2644              {
2645              fprintf(outfile, "Matched, but too many substrings\n");
2646              count = use_size_offsets/3;
2647              }
2648          }          }
2649    
2650          /* Matched */
2651    
2652        if (count >= 0)        if (count >= 0)
2653          {          {
2654          int i;          int i, maxcount;
2655          count *= 2;  
2656          for (i = 0; i < count; i += 2)  #if !defined NODFA
2657            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2658    #endif
2659              maxcount = use_size_offsets/3;
2660    
2661            /* This is a check against a lunatic return value. */
2662    
2663            if (count > maxcount)
2664              {
2665              fprintf(outfile,
2666                "** PCRE error: returned count %d is too big for offset size %d\n",
2667                count, use_size_offsets);
2668              count = use_size_offsets/3;
2669              if (do_g || do_G)
2670                {
2671                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2672                do_g = do_G = FALSE;        /* Break g/G loop */
2673                }
2674              }
2675    
2676            for (i = 0; i < count * 2; i += 2)
2677            {            {
2678            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2679              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2680            else            else
2681              {              {
2682              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2683              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2684                  use_offsets[i+1] - use_offsets[i], outfile);
2685              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2686                if (i == 0)
2687                  {
2688                  if (do_showrest)
2689                    {
2690                    fprintf(outfile, " 0+ ");
2691                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2692                      outfile);
2693                    fprintf(outfile, "\n");
2694                    }
2695                  }
2696                }
2697              }
2698    
2699            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2700    
2701            for (i = 0; i < 32; i++)
2702              {
2703              if ((copystrings & (1 << i)) != 0)
2704                {
2705                char copybuffer[256];
2706                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2707                  i, copybuffer, sizeof(copybuffer));
2708                if (rc < 0)
2709                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2710                else
2711                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2712                }
2713              }
2714    
2715            for (copynamesptr = copynames;
2716                 *copynamesptr != 0;
2717                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2718              {
2719              char copybuffer[256];
2720              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2721                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2722              if (rc < 0)
2723                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2724              else
2725                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2726              }
2727    
2728            for (i = 0; i < 32; i++)
2729              {
2730              if ((getstrings & (1 << i)) != 0)
2731                {
2732                const char *substring;
2733                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2734                  i, &substring);
2735                if (rc < 0)
2736                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
2737                else
2738                  {
2739                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2740                  pcre_free_substring(substring);
2741                  }
2742                }
2743              }
2744    
2745            for (getnamesptr = getnames;
2746                 *getnamesptr != 0;
2747                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2748              {
2749              const char *substring;
2750              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2751                count, (char *)getnamesptr, &substring);
2752              if (rc < 0)
2753                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2754              else
2755                {
2756                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2757                pcre_free_substring(substring);
2758                }
2759              }
2760    
2761            if (getlist)
2762              {
2763              const char **stringlist;
2764              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2765                &stringlist);
2766              if (rc < 0)
2767                fprintf(outfile, "get substring list failed %d\n", rc);
2768              else
2769                {
2770                for (i = 0; i < count; i++)
2771                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2772                if (stringlist[i] != NULL)
2773                  fprintf(outfile, "string list not terminated by NULL\n");
2774                /* free((void *)stringlist); */
2775                pcre_free_substring_list(stringlist);
2776              }              }
2777            }            }
2778          }          }
2779    
2780          /* There was a partial match */
2781    
2782          else if (count == PCRE_ERROR_PARTIAL)
2783            {
2784            if (markptr == NULL) fprintf(outfile, "Partial match");
2785              else fprintf(outfile, "Partial match, mark=%s", markptr);
2786            if (use_size_offsets > 1)
2787              {
2788              fprintf(outfile, ": ");
2789              pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2790                outfile);
2791              }
2792            fprintf(outfile, "\n");
2793            break;  /* Out of the /g loop */
2794            }
2795    
2796          /* Failed to match. If this is a /g or /G loop and we previously set
2797          g_notempty after a null match, this is not necessarily the end. We want
2798          to advance the start offset, and continue. We won't be at the end of the
2799          string - that was checked before setting g_notempty.
2800    
2801          Complication arises in the case when the newline convention is "any",
2802          "crlf", or "anycrlf". If the previous match was at the end of a line
2803          terminated by CRLF, an advance of one character just passes the \r,
2804          whereas we should prefer the longer newline sequence, as does the code in
2805          pcre_exec(). Fudge the offset value to achieve this. We check for a
2806          newline setting in the pattern; if none was set, use pcre_config() to
2807          find the default.
2808    
2809          Otherwise, in the case of UTF-8 matching, the advance must be one
2810          character, not one byte. */
2811    
2812        else        else
2813          {          {
2814          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2815              {
2816              int onechar = 1;
2817              unsigned int obits = ((real_pcre *)re)->options;
2818              use_offsets[0] = start_offset;
2819              if ((obits & PCRE_NEWLINE_BITS) == 0)
2820                {
2821                int d;
2822                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2823                /* Note that these values are always the ASCII ones, even in
2824                EBCDIC environments. CR = 13, NL = 10. */
2825                obits = (d == 13)? PCRE_NEWLINE_CR :
2826                        (d == 10)? PCRE_NEWLINE_LF :
2827                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2828                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2829                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2830                }
2831              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2832                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2833                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2834                  &&
2835                  start_offset < len - 1 &&
2836                  bptr[start_offset] == '\r' &&
2837                  bptr[start_offset+1] == '\n')
2838                onechar++;
2839              else if (use_utf8)
2840                {
2841                while (start_offset + onechar < len)
2842                  {
2843                  if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2844                  onechar++;
2845                  }
2846                }
2847              use_offsets[1] = start_offset + onechar;
2848              }
2849            else
2850              {
2851              if (count == PCRE_ERROR_NOMATCH)
2852                {
2853                if (gmatched == 0)
2854                  {
2855                  if (markptr == NULL) fprintf(outfile, "No match\n");
2856                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2857                  }
2858                }
2859            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2860              break;  /* Out of the /g loop */
2861              }
2862          }          }
2863        }  
2864      }        /* If not /g or /G we are done */
2865    
2866          if (!do_g && !do_G) break;
2867    
2868          /* If we have matched an empty string, first check to see if we are at
2869          the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2870          Perl's /g options does. This turns out to be rather cunning. First we set
2871          PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2872          same point. If this fails (picked up above) we advance to the next
2873          character. */
2874    
2875          g_notempty = 0;
2876    
2877          if (use_offsets[0] == use_offsets[1])
2878            {
2879            if (use_offsets[0] == len) break;
2880            g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2881            }
2882    
2883          /* For /g, update the start offset, leaving the rest alone */
2884    
2885          if (do_g) start_offset = use_offsets[1];
2886    
2887          /* For /G, update the pointer and length */
2888    
2889          else
2890            {
2891            bptr += use_offsets[1];
2892            len -= use_offsets[1];
2893            }
2894          }  /* End of loop for /g and /G */
2895    
2896        NEXT_DATA: continue;
2897        }    /* End of loop for data lines */
2898    
2899    CONTINUE:    CONTINUE:
2900    
2901    #if !defined NOPOSIX
2902    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2903    if (re != NULL) free(re);  #endif
2904    if (extra != NULL) free(extra);  
2905    if (tables != NULL)    if (re != NULL) new_free(re);
2906      if (extra != NULL) new_free(extra);
2907      if (locale_set)
2908      {      {
2909      free((void *)tables);      new_free((void *)tables);
2910      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2911        locale_set = 0;
2912      }      }
2913    }    }
2914    
2915  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2916  return 0;  
2917    EXIT:
2918    
2919    if (infile != NULL && infile != stdin) fclose(infile);
2920    if (outfile != NULL && outfile != stdout) fclose(outfile);
2921    
2922    free(buffer);
2923    free(dbuffer);
2924    free(pbuffer);
2925    free(offsets);
2926    
2927    return yield;
2928  }  }
2929    
2930  /* End */  /* End of pcretest.c */

Legend:
Removed from v.25  
changed lines
  Added in v.567

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12