/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 567 by ph10, Sat Nov 6 17:10:00 2010 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
51  #define PCRE_SPY        /* For Win32 build, import data, not export */  #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #ifndef isatty
75    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
76    #endif                         /* though in some environments they seem to   */
77                                   /* be already defined, hence the #ifndefs.    */
78    #ifndef fileno
79    #define fileno _fileno
80    #endif
81    
82    #else
83    #include <sys/time.h>          /* These two includes are needed */
84    #include <sys/resource.h>      /* for setrlimit(). */
85    #define INPUT_MODE   "rb"
86    #define OUTPUT_MODE  "wb"
87    #endif
88    
89    
90    /* We have to include pcre_internal.h because we need the internal info for
91    displaying the results of pcre_study() and we also need to know about the
92    internal macros, structures, and other internal data values; pcretest has
93    "inside information" compared to a program that strictly follows the PCRE API.
94    
95  /* We include pcre_internal.h because we need the internal info for displaying  Although pcre_internal.h does itself include pcre.h, we explicitly include it
96  the results of pcre_study() and we also need to know about the internal  here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97  macros, structures, and other internal data values; pcretest has "inside  appropriately for an application, not for building PCRE. */
 information" compared to a program that strictly follows the PCRE API. */  
98    
99    #include "pcre.h"
100  #include "pcre_internal.h"  #include "pcre_internal.h"
101    
102  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to some of the data tables that PCRE uses. So as not to have
103  two copies, we include the source file here, changing the names of the external  to keep two copies, we include the source file here, changing the names of the
104  symbols to prevent clashes. */  external symbols to prevent clashes. */
105    
106    #define _pcre_ucp_gentype      ucp_gentype
107  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
108  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
109  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 64  symbols to prevent clashes. */ Line 111  symbols to prevent clashes. */
111  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
112  #define _pcre_utt              utt  #define _pcre_utt              utt
113  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
114    #define _pcre_utt_names        utt_names
115  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
116    
117  #include "pcre_tables.c"  #include "pcre_tables.c"
118    
119  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
120  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
121  pcre_compile.c when that module is compiled with debugging enabled. */  pcre_compile.c when that module is compiled with debugging enabled. It needs to
122    know which case is being compiled. */
123    
124    #define COMPILING_PCRETEST
125  #include "pcre_printint.src"  #include "pcre_printint.src"
126    
127    /* The definition of the macro PRINTABLE, which determines whether to print an
128    output character as-is or as a hex value when showing compiled patterns, is
129    contained in the printint.src file. We uses it here also, in cases when the
130    locale has not been explicitly changed, so as to get consistent output from
131    systems that differ in their output from isprint() even in the "C" locale. */
132    
133    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134    
135  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
136  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 83  Makefile. */ Line 140  Makefile. */
140  #include "pcreposix.h"  #include "pcreposix.h"
141  #endif  #endif
142    
143  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
144  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
146  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147    UTF8 support if PCRE is built without it. */
148    
149    #ifndef SUPPORT_UTF8
150    #ifndef NOUTF8
151    #define NOUTF8
152    #endif
153    #endif
154    
155    
156  /* Other parameters */  /* Other parameters */
# Line 99  function (define NOINFOCHECK). */ Line 163  function (define NOINFOCHECK). */
163  #endif  #endif
164  #endif  #endif
165    
166  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
   
 #define BUFFER_SIZE 30000  
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
167    
168    #define LOOPREPEAT 500000
169    
170  /* Static variables */  /* Static variables */
171    
# Line 114  static int callout_count; Line 175  static int callout_count;
175  static int callout_extra;  static int callout_extra;
176  static int callout_fail_count;  static int callout_fail_count;
177  static int callout_fail_id;  static int callout_fail_id;
178    static int debug_lengths;
179  static int first_callout;  static int first_callout;
180    static int locale_set = 0;
181  static int show_malloc;  static int show_malloc;
182  static int use_utf8;  static int use_utf8;
183  static size_t gotten_store;  static size_t gotten_store;
184    
185    /* The buffers grow automatically if very long input lines are encountered. */
186    
187    static int buffer_size = 50000;
188    static uschar *buffer = NULL;
189    static uschar *dbuffer = NULL;
190  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
191    
192    
193    /*************************************************
194    *         Alternate character tables             *
195    *************************************************/
196    
197    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
198    using the default tables of the library. However, the T option can be used to
199    select alternate sets of tables, for different kinds of testing. Note also that
200    the L (locale) option also adjusts the tables. */
201    
202    /* This is the set of tables distributed as default with PCRE. It recognizes
203    only ASCII characters. */
204    
205    static const unsigned char tables0[] = {
206    
207    /* This table is a lower casing table. */
208    
209        0,  1,  2,  3,  4,  5,  6,  7,
210        8,  9, 10, 11, 12, 13, 14, 15,
211       16, 17, 18, 19, 20, 21, 22, 23,
212       24, 25, 26, 27, 28, 29, 30, 31,
213       32, 33, 34, 35, 36, 37, 38, 39,
214       40, 41, 42, 43, 44, 45, 46, 47,
215       48, 49, 50, 51, 52, 53, 54, 55,
216       56, 57, 58, 59, 60, 61, 62, 63,
217       64, 97, 98, 99,100,101,102,103,
218      104,105,106,107,108,109,110,111,
219      112,113,114,115,116,117,118,119,
220      120,121,122, 91, 92, 93, 94, 95,
221       96, 97, 98, 99,100,101,102,103,
222      104,105,106,107,108,109,110,111,
223      112,113,114,115,116,117,118,119,
224      120,121,122,123,124,125,126,127,
225      128,129,130,131,132,133,134,135,
226      136,137,138,139,140,141,142,143,
227      144,145,146,147,148,149,150,151,
228      152,153,154,155,156,157,158,159,
229      160,161,162,163,164,165,166,167,
230      168,169,170,171,172,173,174,175,
231      176,177,178,179,180,181,182,183,
232      184,185,186,187,188,189,190,191,
233      192,193,194,195,196,197,198,199,
234      200,201,202,203,204,205,206,207,
235      208,209,210,211,212,213,214,215,
236      216,217,218,219,220,221,222,223,
237      224,225,226,227,228,229,230,231,
238      232,233,234,235,236,237,238,239,
239      240,241,242,243,244,245,246,247,
240      248,249,250,251,252,253,254,255,
241    
242    /* This table is a case flipping table. */
243    
244        0,  1,  2,  3,  4,  5,  6,  7,
245        8,  9, 10, 11, 12, 13, 14, 15,
246       16, 17, 18, 19, 20, 21, 22, 23,
247       24, 25, 26, 27, 28, 29, 30, 31,
248       32, 33, 34, 35, 36, 37, 38, 39,
249       40, 41, 42, 43, 44, 45, 46, 47,
250       48, 49, 50, 51, 52, 53, 54, 55,
251       56, 57, 58, 59, 60, 61, 62, 63,
252       64, 97, 98, 99,100,101,102,103,
253      104,105,106,107,108,109,110,111,
254      112,113,114,115,116,117,118,119,
255      120,121,122, 91, 92, 93, 94, 95,
256       96, 65, 66, 67, 68, 69, 70, 71,
257       72, 73, 74, 75, 76, 77, 78, 79,
258       80, 81, 82, 83, 84, 85, 86, 87,
259       88, 89, 90,123,124,125,126,127,
260      128,129,130,131,132,133,134,135,
261      136,137,138,139,140,141,142,143,
262      144,145,146,147,148,149,150,151,
263      152,153,154,155,156,157,158,159,
264      160,161,162,163,164,165,166,167,
265      168,169,170,171,172,173,174,175,
266      176,177,178,179,180,181,182,183,
267      184,185,186,187,188,189,190,191,
268      192,193,194,195,196,197,198,199,
269      200,201,202,203,204,205,206,207,
270      208,209,210,211,212,213,214,215,
271      216,217,218,219,220,221,222,223,
272      224,225,226,227,228,229,230,231,
273      232,233,234,235,236,237,238,239,
274      240,241,242,243,244,245,246,247,
275      248,249,250,251,252,253,254,255,
276    
277    /* This table contains bit maps for various character classes. Each map is 32
278    bytes long and the bits run from the least significant end of each byte. The
279    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
280    graph, print, punct, and cntrl. Other classes are built from combinations. */
281    
282      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
283      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
284      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
285      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
286    
287      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
288      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
289      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
290      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
291    
292      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
293      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
294      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
295      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
296    
297      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
298      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
299      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
300      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
301    
302      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
303      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
304      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
305      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
306    
307      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
308      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
309      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
310      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
311    
312      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
313      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
314      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
315      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
316    
317      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
318      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
319      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
320      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
321    
322      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
323      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
324      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
325      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
326    
327      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
328      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
329      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
330      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
331    
332    /* This table identifies various classes of character by individual bits:
333      0x01   white space character
334      0x02   letter
335      0x04   decimal digit
336      0x08   hexadecimal digit
337      0x10   alphanumeric or '_'
338      0x80   regular expression metacharacter or binary zero
339    */
340    
341      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
342      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
343      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
344      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
345      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
346      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
347      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
348      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
349      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
350      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
351      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
352      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
353      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
354      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
355      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
356      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
357      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
358      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
359      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
360      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
361      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
362      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
363      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
364      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
365      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
366      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
367      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
368      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
369      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
370      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
371      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
372      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
373    
374    /* This is a set of tables that came orginally from a Windows user. It seems to
375    be at least an approximation of ISO 8859. In particular, there are characters
376    greater than 128 that are marked as spaces, letters, etc. */
377    
378    static const unsigned char tables1[] = {
379    0,1,2,3,4,5,6,7,
380    8,9,10,11,12,13,14,15,
381    16,17,18,19,20,21,22,23,
382    24,25,26,27,28,29,30,31,
383    32,33,34,35,36,37,38,39,
384    40,41,42,43,44,45,46,47,
385    48,49,50,51,52,53,54,55,
386    56,57,58,59,60,61,62,63,
387    64,97,98,99,100,101,102,103,
388    104,105,106,107,108,109,110,111,
389    112,113,114,115,116,117,118,119,
390    120,121,122,91,92,93,94,95,
391    96,97,98,99,100,101,102,103,
392    104,105,106,107,108,109,110,111,
393    112,113,114,115,116,117,118,119,
394    120,121,122,123,124,125,126,127,
395    128,129,130,131,132,133,134,135,
396    136,137,138,139,140,141,142,143,
397    144,145,146,147,148,149,150,151,
398    152,153,154,155,156,157,158,159,
399    160,161,162,163,164,165,166,167,
400    168,169,170,171,172,173,174,175,
401    176,177,178,179,180,181,182,183,
402    184,185,186,187,188,189,190,191,
403    224,225,226,227,228,229,230,231,
404    232,233,234,235,236,237,238,239,
405    240,241,242,243,244,245,246,215,
406    248,249,250,251,252,253,254,223,
407    224,225,226,227,228,229,230,231,
408    232,233,234,235,236,237,238,239,
409    240,241,242,243,244,245,246,247,
410    248,249,250,251,252,253,254,255,
411    0,1,2,3,4,5,6,7,
412    8,9,10,11,12,13,14,15,
413    16,17,18,19,20,21,22,23,
414    24,25,26,27,28,29,30,31,
415    32,33,34,35,36,37,38,39,
416    40,41,42,43,44,45,46,47,
417    48,49,50,51,52,53,54,55,
418    56,57,58,59,60,61,62,63,
419    64,97,98,99,100,101,102,103,
420    104,105,106,107,108,109,110,111,
421    112,113,114,115,116,117,118,119,
422    120,121,122,91,92,93,94,95,
423    96,65,66,67,68,69,70,71,
424    72,73,74,75,76,77,78,79,
425    80,81,82,83,84,85,86,87,
426    88,89,90,123,124,125,126,127,
427    128,129,130,131,132,133,134,135,
428    136,137,138,139,140,141,142,143,
429    144,145,146,147,148,149,150,151,
430    152,153,154,155,156,157,158,159,
431    160,161,162,163,164,165,166,167,
432    168,169,170,171,172,173,174,175,
433    176,177,178,179,180,181,182,183,
434    184,185,186,187,188,189,190,191,
435    224,225,226,227,228,229,230,231,
436    232,233,234,235,236,237,238,239,
437    240,241,242,243,244,245,246,215,
438    248,249,250,251,252,253,254,223,
439    192,193,194,195,196,197,198,199,
440    200,201,202,203,204,205,206,207,
441    208,209,210,211,212,213,214,247,
442    216,217,218,219,220,221,222,255,
443    0,62,0,0,1,0,0,0,
444    0,0,0,0,0,0,0,0,
445    32,0,0,0,1,0,0,0,
446    0,0,0,0,0,0,0,0,
447    0,0,0,0,0,0,255,3,
448    126,0,0,0,126,0,0,0,
449    0,0,0,0,0,0,0,0,
450    0,0,0,0,0,0,0,0,
451    0,0,0,0,0,0,255,3,
452    0,0,0,0,0,0,0,0,
453    0,0,0,0,0,0,12,2,
454    0,0,0,0,0,0,0,0,
455    0,0,0,0,0,0,0,0,
456    254,255,255,7,0,0,0,0,
457    0,0,0,0,0,0,0,0,
458    255,255,127,127,0,0,0,0,
459    0,0,0,0,0,0,0,0,
460    0,0,0,0,254,255,255,7,
461    0,0,0,0,0,4,32,4,
462    0,0,0,128,255,255,127,255,
463    0,0,0,0,0,0,255,3,
464    254,255,255,135,254,255,255,7,
465    0,0,0,0,0,4,44,6,
466    255,255,127,255,255,255,127,255,
467    0,0,0,0,254,255,255,255,
468    255,255,255,255,255,255,255,127,
469    0,0,0,0,254,255,255,255,
470    255,255,255,255,255,255,255,255,
471    0,2,0,0,255,255,255,255,
472    255,255,255,255,255,255,255,127,
473    0,0,0,0,255,255,255,255,
474    255,255,255,255,255,255,255,255,
475    0,0,0,0,254,255,0,252,
476    1,0,0,248,1,0,0,120,
477    0,0,0,0,254,255,255,255,
478    0,0,128,0,0,0,128,0,
479    255,255,255,255,0,0,0,0,
480    0,0,0,0,0,0,0,128,
481    255,255,255,255,0,0,0,0,
482    0,0,0,0,0,0,0,0,
483    128,0,0,0,0,0,0,0,
484    0,1,1,0,1,1,0,0,
485    0,0,0,0,0,0,0,0,
486    0,0,0,0,0,0,0,0,
487    1,0,0,0,128,0,0,0,
488    128,128,128,128,0,0,128,0,
489    28,28,28,28,28,28,28,28,
490    28,28,0,0,0,0,0,128,
491    0,26,26,26,26,26,26,18,
492    18,18,18,18,18,18,18,18,
493    18,18,18,18,18,18,18,18,
494    18,18,18,128,128,0,128,16,
495    0,26,26,26,26,26,26,18,
496    18,18,18,18,18,18,18,18,
497    18,18,18,18,18,18,18,18,
498    18,18,18,128,128,0,0,0,
499    0,0,0,0,0,1,0,0,
500    0,0,0,0,0,0,0,0,
501    0,0,0,0,0,0,0,0,
502    0,0,0,0,0,0,0,0,
503    1,0,0,0,0,0,0,0,
504    0,0,18,0,0,0,0,0,
505    0,0,20,20,0,18,0,0,
506    0,20,18,0,0,0,0,0,
507    18,18,18,18,18,18,18,18,
508    18,18,18,18,18,18,18,18,
509    18,18,18,18,18,18,18,0,
510    18,18,18,18,18,18,18,18,
511    18,18,18,18,18,18,18,18,
512    18,18,18,18,18,18,18,18,
513    18,18,18,18,18,18,18,0,
514    18,18,18,18,18,18,18,18
515    };
516    
517    
518    
519    
520    #ifndef HAVE_STRERROR
521    /*************************************************
522    *     Provide strerror() for non-ANSI libraries  *
523    *************************************************/
524    
525    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
526    in their libraries, but can provide the same facility by this simple
527    alternative function. */
528    
529    extern int   sys_nerr;
530    extern char *sys_errlist[];
531    
532    char *
533    strerror(int n)
534    {
535    if (n < 0 || n >= sys_nerr) return "unknown error number";
536    return sys_errlist[n];
537    }
538    #endif /* HAVE_STRERROR */
539    
540    
541    
542    
543    /*************************************************
544    *        Read or extend an input line            *
545    *************************************************/
546    
547    /* Input lines are read into buffer, but both patterns and data lines can be
548    continued over multiple input lines. In addition, if the buffer fills up, we
549    want to automatically expand it so as to be able to handle extremely large
550    lines that are needed for certain stress tests. When the input buffer is
551    expanded, the other two buffers must also be expanded likewise, and the
552    contents of pbuffer, which are a copy of the input for callouts, must be
553    preserved (for when expansion happens for a data line). This is not the most
554    optimal way of handling this, but hey, this is just a test program!
555    
556    Arguments:
557      f            the file to read
558      start        where in buffer to start (this *must* be within buffer)
559      prompt       for stdin or readline()
560    
561    Returns:       pointer to the start of new data
562                   could be a copy of start, or could be moved
563                   NULL if no data read and EOF reached
564    */
565    
566    static uschar *
567    extend_inputline(FILE *f, uschar *start, const char *prompt)
568    {
569    uschar *here = start;
570    
571    for (;;)
572      {
573      int rlen = (int)(buffer_size - (here - buffer));
574    
575      if (rlen > 1000)
576        {
577        int dlen;
578    
579        /* If libreadline support is required, use readline() to read a line if the
580        input is a terminal. Note that readline() removes the trailing newline, so
581        we must put it back again, to be compatible with fgets(). */
582    
583    #ifdef SUPPORT_LIBREADLINE
584        if (isatty(fileno(f)))
585          {
586          size_t len;
587          char *s = readline(prompt);
588          if (s == NULL) return (here == start)? NULL : start;
589          len = strlen(s);
590          if (len > 0) add_history(s);
591          if (len > rlen - 1) len = rlen - 1;
592          memcpy(here, s, len);
593          here[len] = '\n';
594          here[len+1] = 0;
595          free(s);
596          }
597        else
598    #endif
599    
600        /* Read the next line by normal means, prompting if the file is stdin. */
601    
602          {
603          if (f == stdin) printf("%s", prompt);
604          if (fgets((char *)here, rlen,  f) == NULL)
605            return (here == start)? NULL : start;
606          }
607    
608        dlen = (int)strlen((char *)here);
609        if (dlen > 0 && here[dlen - 1] == '\n') return start;
610        here += dlen;
611        }
612    
613      else
614        {
615        int new_buffer_size = 2*buffer_size;
616        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
617        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
618        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
619    
620        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
621          {
622          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
623          exit(1);
624          }
625    
626        memcpy(new_buffer, buffer, buffer_size);
627        memcpy(new_pbuffer, pbuffer, buffer_size);
628    
629        buffer_size = new_buffer_size;
630    
631        start = new_buffer + (start - buffer);
632        here = new_buffer + (here - buffer);
633    
634        free(buffer);
635        free(dbuffer);
636        free(pbuffer);
637    
638        buffer = new_buffer;
639        dbuffer = new_dbuffer;
640        pbuffer = new_pbuffer;
641        }
642      }
643    
644    return NULL;  /* Control never gets here */
645    }
646    
647    
648    
649    
650    
651    
652    
653  /*************************************************  /*************************************************
654  *          Read number from string               *  *          Read number from string               *
# Line 129  static uschar *pbuffer = NULL; Line 656  static uschar *pbuffer = NULL;
656    
657  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
658  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
659  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
660    
661  Arguments:  Arguments:
662    str           string to be converted    str           string to be converted
# Line 159  return(result); Line 686  return(result);
686  and returns the value of the character.  and returns the value of the character.
687    
688  Argument:  Argument:
689    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
690    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
691    
692  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
693             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
694  */  */
695    
696  #if !defined NOUTF8  #if !defined NOUTF8
697    
698  static int  static int
699  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
700  {  {
701  int c = *buffer++;  int c = *utf8bytes++;
702  int d = c;  int d = c;
703  int i, j, s;  int i, j, s;
704    
# Line 191  d = (c & utf8_table3[i]) << s; Line 718  d = (c & utf8_table3[i]) << s;
718    
719  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
720    {    {
721    c = *buffer++;    c = *utf8bytes++;
722    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
723    s -= 6;    s -= 6;
724    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 222  and encodes it as a UTF-8 character in 0 Line 749  and encodes it as a UTF-8 character in 0
749    
750  Arguments:  Arguments:
751    cvalue     the character value    cvalue     the character value
752    buffer     pointer to buffer for result - at least 6 bytes long    utf8bytes  pointer to buffer for result - at least 6 bytes long
753    
754  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
755  */  */
756    
757    #if !defined NOUTF8
758    
759  static int  static int
760  ord2utf8(int cvalue, uschar *buffer)  ord2utf8(int cvalue, uschar *utf8bytes)
761  {  {
762  register int i, j;  register int i, j;
763  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
764    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
765  buffer += i;  utf8bytes += i;
766  for (j = i; j > 0; j--)  for (j = i; j > 0; j--)
767   {   {
768   *buffer-- = 0x80 | (cvalue & 0x3f);   *utf8bytes-- = 0x80 | (cvalue & 0x3f);
769   cvalue >>= 6;   cvalue >>= 6;
770   }   }
771  *buffer = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
772  return i + 1;  return i + 1;
773  }  }
774    
775    #endif
776    
777    
778    
779  /*************************************************  /*************************************************
# Line 269  while (length-- > 0) Line 800  while (length-- > 0)
800        {        {
801        length -= rc - 1;        length -= rc - 1;
802        p += rc;        p += rc;
803        if (c < 256 && isprint(c))        if (PRINTHEX(c))
804          {          {
805          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
806          yield++;          yield++;
807          }          }
808        else        else
809          {          {
810          int n;          int n = 4;
811          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
812          yield += n;          yield += (n <= 0x000000ff)? 2 :
813                     (n <= 0x00000fff)? 3 :
814                     (n <= 0x0000ffff)? 4 :
815                     (n <= 0x000fffff)? 5 : 6;
816          }          }
817        continue;        continue;
818        }        }
# Line 287  while (length-- > 0) Line 821  while (length-- > 0)
821    
822     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
823    
824    if (isprint(c = *(p++)))    c = *p++;
825      if (PRINTHEX(c))
826      {      {
827      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
828      yield++;      yield++;
# Line 461  if ((rc = pcre_fullinfo(re, study, optio Line 996  if ((rc = pcre_fullinfo(re, study, optio
996  *         Byte flipping function                 *  *         Byte flipping function                 *
997  *************************************************/  *************************************************/
998    
999  static long int  static unsigned long int
1000  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
1001  {  {
1002  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
1003  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 526  return count; Line 1061  return count;
1061    
1062    
1063  /*************************************************  /*************************************************
1064    *         Case-independent strncmp() function    *
1065    *************************************************/
1066    
1067    /*
1068    Arguments:
1069      s         first string
1070      t         second string
1071      n         number of characters to compare
1072    
1073    Returns:    < 0, = 0, or > 0, according to the comparison
1074    */
1075    
1076    static int
1077    strncmpic(uschar *s, uschar *t, int n)
1078    {
1079    while (n--)
1080      {
1081      int c = tolower(*s++) - tolower(*t++);
1082      if (c) return c;
1083      }
1084    return 0;
1085    }
1086    
1087    
1088    
1089    /*************************************************
1090    *         Check newline indicator                *
1091    *************************************************/
1092    
1093    /* This is used both at compile and run-time to check for <xxx> escapes. Print
1094    a message and return 0 if there is no match.
1095    
1096    Arguments:
1097      p           points after the leading '<'
1098      f           file for error message
1099    
1100    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
1101    */
1102    
1103    static int
1104    check_newline(uschar *p, FILE *f)
1105    {
1106    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1107    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1108    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1109    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1110    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1111    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1112    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1113    fprintf(f, "Unknown newline type at: <%s\n", p);
1114    return 0;
1115    }
1116    
1117    
1118    
1119    /*************************************************
1120    *             Usage function                     *
1121    *************************************************/
1122    
1123    static void
1124    usage(void)
1125    {
1126    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1127    printf("Input and output default to stdin and stdout.\n");
1128    #ifdef SUPPORT_LIBREADLINE
1129    printf("If input is a terminal, readline() is used to read from it.\n");
1130    #else
1131    printf("This version of pcretest is not linked with readline().\n");
1132    #endif
1133    printf("\nOptions:\n");
1134    printf("  -b       show compiled code (bytecode)\n");
1135    printf("  -C       show PCRE compile-time options and exit\n");
1136    printf("  -d       debug: show compiled code and information (-b and -i)\n");
1137    #if !defined NODFA
1138    printf("  -dfa     force DFA matching for all subjects\n");
1139    #endif
1140    printf("  -help    show usage information\n");
1141    printf("  -i       show information about compiled patterns\n"
1142           "  -M       find MATCH_LIMIT minimum for each subject\n"
1143           "  -m       output memory used information\n"
1144           "  -o <n>   set size of offsets vector to <n>\n");
1145    #if !defined NOPOSIX
1146    printf("  -p       use POSIX interface\n");
1147    #endif
1148    printf("  -q       quiet: do not output PCRE version number at start\n");
1149    printf("  -S <n>   set stack size to <n> megabytes\n");
1150    printf("  -s       output store (memory) used information\n"
1151           "  -t       time compilation and execution\n");
1152    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1153    printf("  -tm      time execution (matching) only\n");
1154    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
1155    }
1156    
1157    
1158    
1159    /*************************************************
1160  *                Main Program                    *  *                Main Program                    *
1161  *************************************************/  *************************************************/
1162    
# Line 538  int main(int argc, char **argv) Line 1169  int main(int argc, char **argv)
1169  FILE *infile = stdin;  FILE *infile = stdin;
1170  int options = 0;  int options = 0;
1171  int study_options = 0;  int study_options = 0;
1172    int default_find_match_limit = FALSE;
1173  int op = 1;  int op = 1;
1174  int timeit = 0;  int timeit = 0;
1175    int timeitm = 0;
1176  int showinfo = 0;  int showinfo = 0;
1177  int showstore = 0;  int showstore = 0;
1178  int quiet = 0;  int quiet = 0;
# Line 553  int debug = 0; Line 1186  int debug = 0;
1186  int done = 0;  int done = 0;
1187  int all_use_dfa = 0;  int all_use_dfa = 0;
1188  int yield = 0;  int yield = 0;
1189    int stack_size;
1190    
1191    /* These vectors store, end-to-end, a list of captured substring names. Assume
1192    that 1024 is plenty long enough for the few names we'll be testing. */
1193    
1194  unsigned char *buffer;  uschar copynames[1024];
1195  unsigned char *dbuffer;  uschar getnames[1024];
1196    
1197    uschar *copynamesptr;
1198    uschar *getnamesptr;
1199    
1200  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
1201  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
1202    
1203  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
1204  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
1205  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
1206    
1207  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
1208    
1209  outfile = stdout;  outfile = stdout;
1210    
1211    /* The following  _setmode() stuff is some Windows magic that tells its runtime
1212    library to translate CRLF into a single LF character. At least, that's what
1213    I've been told: never having used Windows I take this all on trust. Originally
1214    it set 0x8000, but then I was advised that _O_BINARY was better. */
1215    
1216    #if defined(_WIN32) || defined(WIN32)
1217    _setmode( _fileno( stdout ), _O_BINARY );
1218    #endif
1219    
1220  /* Scan options */  /* Scan options */
1221    
1222  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 582  while (argc > 1 && argv[op][0] == '-') Line 1225  while (argc > 1 && argv[op][0] == '-')
1225    
1226    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
1227      showstore = 1;      showstore = 1;
   else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
1228    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1229      else if (strcmp(argv[op], "-b") == 0) debug = 1;
1230    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1231    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1232      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1233  #if !defined NODFA  #if !defined NODFA
1234    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1235  #endif  #endif
# Line 596  while (argc > 1 && argv[op][0] == '-') Line 1240  while (argc > 1 && argv[op][0] == '-')
1240      op++;      op++;
1241      argc--;      argc--;
1242      }      }
1243      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
1244        {
1245        int both = argv[op][2] == 0;
1246        int temp;
1247        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
1248                         *endptr == 0))
1249          {
1250          timeitm = temp;
1251          op++;
1252          argc--;
1253          }
1254        else timeitm = LOOPREPEAT;
1255        if (both) timeit = timeitm;
1256        }
1257      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1258          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
1259            *endptr == 0))
1260        {
1261    #if defined(_WIN32) || defined(WIN32)
1262        printf("PCRE: -S not supported on this OS\n");
1263        exit(1);
1264    #else
1265        int rc;
1266        struct rlimit rlim;
1267        getrlimit(RLIMIT_STACK, &rlim);
1268        rlim.rlim_cur = stack_size * 1024 * 1024;
1269        rc = setrlimit(RLIMIT_STACK, &rlim);
1270        if (rc != 0)
1271          {
1272        printf("PCRE: setrlimit() failed with error %d\n", rc);
1273        exit(1);
1274          }
1275        op++;
1276        argc--;
1277    #endif
1278        }
1279  #if !defined NOPOSIX  #if !defined NOPOSIX
1280    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
1281  #endif  #endif
1282    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1283      {      {
1284      int rc;      int rc;
1285        unsigned long int lrc;
1286      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1287      printf("Compiled with\n");      printf("Compiled with\n");
1288      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
# Line 609  while (argc > 1 && argv[op][0] == '-') Line 1290  while (argc > 1 && argv[op][0] == '-')
1290      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1291      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1292      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1293      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
1294        in EBCDIC environments. CR is 13 and NL is 10. */
1295        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1296          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1297          (rc == -2)? "ANYCRLF" :
1298          (rc == -1)? "ANY" : "???");
1299        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1300        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1301                                         "all Unicode newlines");
1302      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1303      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1304      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1305      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1306      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1307      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1308      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1309      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
1310      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1311      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1312      exit(0);      goto EXIT;
1313        }
1314      else if (strcmp(argv[op], "-help") == 0 ||
1315               strcmp(argv[op], "--help") == 0)
1316        {
1317        usage();
1318        goto EXIT;
1319      }      }
1320    else    else
1321      {      {
1322      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
1323      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
1324      yield = 1;      yield = 1;
1325      goto EXIT;      goto EXIT;
1326      }      }
# Line 653  offsets = (int *)malloc(size_offsets_max Line 1335  offsets = (int *)malloc(size_offsets_max
1335  if (offsets == NULL)  if (offsets == NULL)
1336    {    {
1337    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
1338      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
1339    yield = 1;    yield = 1;
1340    goto EXIT;    goto EXIT;
1341    }    }
# Line 662  if (offsets == NULL) Line 1344  if (offsets == NULL)
1344    
1345  if (argc > 1)  if (argc > 1)
1346    {    {
1347    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
1348    if (infile == NULL)    if (infile == NULL)
1349      {      {
1350      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 673  if (argc > 1) Line 1355  if (argc > 1)
1355    
1356  if (argc > 2)  if (argc > 2)
1357    {    {
1358    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1359    if (outfile == NULL)    if (outfile == NULL)
1360      {      {
1361      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 706  while (!done) Line 1388  while (!done)
1388  #endif  #endif
1389    
1390    const char *error;    const char *error;
1391      unsigned char *markptr;
1392    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1393    unsigned char *to_file = NULL;    unsigned char *to_file = NULL;
1394    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1395    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1396    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1397      int do_mark = 0;
1398    int do_study = 0;    int do_study = 0;
1399    int do_debug = debug;    int do_debug = debug;
1400    int do_G = 0;    int do_G = 0;
# Line 718  while (!done) Line 1402  while (!done)
1402    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1403    int do_showrest = 0;    int do_showrest = 0;
1404    int do_flip = 0;    int do_flip = 0;
1405    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1406    
1407    use_utf8 = 0;    use_utf8 = 0;
1408      debug_lengths = 1;
1409    
1410    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;  
1411    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1412    fflush(outfile);    fflush(outfile);
1413    
# Line 735  while (!done) Line 1419  while (!done)
1419    
1420    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1421      {      {
1422      unsigned long int magic;      unsigned long int magic, get_options;
1423      uschar sbuf[8];      uschar sbuf[8];
1424      FILE *f;      FILE *f;
1425    
# Line 783  while (!done) Line 1467  while (!done)
1467    
1468      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1469    
1470      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1471      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1472    
1473      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1474    
# Line 823  while (!done) Line 1507  while (!done)
1507    
1508    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1509      {      {
1510      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1511      goto SKIP_DATA;      goto SKIP_DATA;
1512      }      }
1513    
1514    pp = p;    pp = p;
1515      poffset = (int)(p - buffer);
1516    
1517    for(;;)    for(;;)
1518      {      {
# Line 838  while (!done) Line 1523  while (!done)
1523        pp++;        pp++;
1524        }        }
1525      if (*pp != 0) break;      if (*pp != 0) break;
1526        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1527        {        {
1528        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1529        done = 1;        done = 1;
# Line 856  while (!done) Line 1532  while (!done)
1532      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1533      }      }
1534    
1535      /* The buffer may have moved while being extended; reset the start of data
1536      pointer to the correct relative point in the buffer. */
1537    
1538      p = buffer + poffset;
1539    
1540    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1541    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1542    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 887  while (!done) Line 1568  while (!done)
1568    
1569        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1570        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1571          case 'B': do_debug = 1; break;
1572        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1573        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1574        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1575        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1576        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1577        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1578          case 'J': options |= PCRE_DUPNAMES; break;
1579          case 'K': do_mark = 1; break;
1580        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1581        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1582    
# Line 902  while (!done) Line 1586  while (!done)
1586    
1587        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1588        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1589          case 'W': options |= PCRE_UCP; break;
1590        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1591          case 'Z': debug_lengths = 0; break;
1592        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1593        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1594    
1595          case 'T':
1596          switch (*pp++)
1597            {
1598            case '0': tables = tables0; break;
1599            case '1': tables = tables1; break;
1600    
1601            case '\r':
1602            case '\n':
1603            case ' ':
1604            case 0:
1605            fprintf(outfile, "** Missing table number after /T\n");
1606            goto SKIP_DATA;
1607    
1608            default:
1609            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
1610            goto SKIP_DATA;
1611            }
1612          break;
1613    
1614        case 'L':        case 'L':
1615        ppp = pp;        ppp = pp;
1616        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1617        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1618          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1619        *ppp = 0;        *ppp = 0;
1620        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1621          {          {
1622          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1623          goto SKIP_DATA;          goto SKIP_DATA;
1624          }          }
1625          locale_set = 1;
1626        tables = pcre_maketables();        tables = pcre_maketables();
1627        pp = ppp;        pp = ppp;
1628        break;        break;
# Line 927  while (!done) Line 1634  while (!done)
1634        *pp = 0;        *pp = 0;
1635        break;        break;
1636    
1637          case '<':
1638            {
1639            if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1640              {
1641              options |= PCRE_JAVASCRIPT_COMPAT;
1642              pp += 3;
1643              }
1644            else
1645              {
1646              int x = check_newline(pp, outfile);
1647              if (x == 0) goto SKIP_DATA;
1648              options |= x;
1649              while (*pp++ != '>');
1650              }
1651            }
1652          break;
1653    
1654        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1655        case '\n':        case '\n':
1656        case ' ':        case ' ':
# Line 953  while (!done) Line 1677  while (!done)
1677      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1678      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1679      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1680        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1681        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1682    
1683      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1684    
# Line 961  while (!done) Line 1687  while (!done)
1687    
1688      if (rc != 0)      if (rc != 0)
1689        {        {
1690        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1691        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1692        goto SKIP_DATA;        goto SKIP_DATA;
1693        }        }
# Line 973  while (!done) Line 1699  while (!done)
1699  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1700    
1701      {      {
1702      if (timeit)      unsigned long int get_options;
1703    
1704        if (timeit > 0)
1705        {        {
1706        register int i;        register int i;
1707        clock_t time_taken;        clock_t time_taken;
1708        clock_t start_time = clock();        clock_t start_time = clock();
1709        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1710          {          {
1711          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1712          if (re != NULL) free(re);          if (re != NULL) free(re);
1713          }          }
1714        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1715        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1716          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1717            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1718        }        }
1719    
# Line 1002  while (!done) Line 1730  while (!done)
1730          {          {
1731          for (;;)          for (;;)
1732            {            {
1733            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1734              {              {
1735              done = 1;              done = 1;
1736              goto CONTINUE;              goto CONTINUE;
# Line 1016  while (!done) Line 1744  while (!done)
1744        goto CONTINUE;        goto CONTINUE;
1745        }        }
1746    
1747      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
1748      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
1749      returns only limited data. Check that it agrees with the newer one. */      lines. */
1750    
1751        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1752        if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1753    
1754        /* Print information if required. There are now two info-returning
1755        functions. The old one has a limited interface and returns only limited
1756        data. Check that it agrees with the newer one. */
1757    
1758      if (log_store)      if (log_store)
1759        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
# Line 1037  while (!done) Line 1772  while (!done)
1772    
1773      if (do_study)      if (do_study)
1774        {        {
1775        if (timeit)        if (timeit > 0)
1776          {          {
1777          register int i;          register int i;
1778          clock_t time_taken;          clock_t time_taken;
1779          clock_t start_time = clock();          clock_t start_time = clock();
1780          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1781            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1782          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1783          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1784          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1785            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1786              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1787          }          }
1788        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 1057  while (!done) Line 1792  while (!done)
1792          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1793        }        }
1794    
1795        /* If /K was present, we set up for handling MARK data. */
1796    
1797        if (do_mark)
1798          {
1799          if (extra == NULL)
1800            {
1801            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1802            extra->flags = 0;
1803            }
1804          extra->mark = &markptr;
1805          extra->flags |= PCRE_EXTRA_MARK;
1806          }
1807    
1808      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
1809      fields in the regex data block and the study block. This is to make it      fields in the regex data block and the study block. This is to make it
1810      possible to test PCRE's handling of byte-flipped patterns, e.g. those      possible to test PCRE's handling of byte-flipped patterns, e.g. those
# Line 1065  while (!done) Line 1813  while (!done)
1813      if (do_flip)      if (do_flip)
1814        {        {
1815        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
1816        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
1817            byteflip(rre->magic_number, sizeof(rre->magic_number));
1818        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1819        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1820        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1821        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
1822        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1823        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
1824        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1825          rre->first_byte =
1826            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1827          rre->req_byte =
1828            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1829          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1830          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
1831        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1832          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
1833        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1834            sizeof(rre->name_count));
1835    
1836        if (extra != NULL)        if (extra != NULL)
1837          {          {
1838          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1839          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1840          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1841            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1842          }          }
1843        }        }
1844    
# Line 1090  while (!done) Line 1846  while (!done)
1846    
1847      SHOW_INFO:      SHOW_INFO:
1848    
1849        if (do_debug)
1850          {
1851          fprintf(outfile, "------------------------------------------------------------------\n");
1852          pcre_printint(re, outfile, debug_lengths);
1853          }
1854    
1855        /* We already have the options in get_options (see above) */
1856    
1857      if (do_showinfo)      if (do_showinfo)
1858        {        {
1859        unsigned long int get_options, all_options;        unsigned long int all_options;
1860  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1861        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1862  #endif  #endif
1863        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1864            hascrorlf;
1865        int nameentrysize, namecount;        int nameentrysize, namecount;
1866        const uschar *nametable;        const uschar *nametable;
1867    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         pcre_printint(re, outfile);  
         }  
   
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
1868        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1869        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1870        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1115  while (!done) Line 1873  while (!done)
1873        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1874        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1875        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1876          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1877          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1878          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1879    
1880  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1881        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1156  while (!done) Line 1917  while (!done)
1917            }            }
1918          }          }
1919    
1920        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1921        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1922    
1923        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1924        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1925    
1926        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1927          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1928            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1929            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1930            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1931            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1932            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1933            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1934              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1935              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1936            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1937            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1938            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1939            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1940            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1941            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1942              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1943              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1944    
1945          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1946    
1947          switch (get_options & PCRE_NEWLINE_BITS)
1948            {
1949            case PCRE_NEWLINE_CR:
1950            fprintf(outfile, "Forced newline sequence: CR\n");
1951            break;
1952    
1953            case PCRE_NEWLINE_LF:
1954            fprintf(outfile, "Forced newline sequence: LF\n");
1955            break;
1956    
1957            case PCRE_NEWLINE_CRLF:
1958            fprintf(outfile, "Forced newline sequence: CRLF\n");
1959            break;
1960    
1961        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_ANYCRLF:
1962          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1963            break;
1964    
1965            case PCRE_NEWLINE_ANY:
1966            fprintf(outfile, "Forced newline sequence: ANY\n");
1967            break;
1968    
1969            default:
1970            break;
1971            }
1972    
1973        if (first_char == -1)        if (first_char == -1)
1974          {          {
1975          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1976          }          }
1977        else if (first_char < 0)        else if (first_char < 0)
1978          {          {
# Line 1199  while (!done) Line 1983  while (!done)
1983          int ch = first_char & 255;          int ch = first_char & 255;
1984          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1985            "" : " (caseless)";            "" : " (caseless)";
1986          if (isprint(ch))          if (PRINTHEX(ch))
1987            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1988          else          else
1989            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1214  while (!done) Line 1998  while (!done)
1998          int ch = need_char & 255;          int ch = need_char & 255;
1999          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
2000            "" : " (caseless)";            "" : " (caseless)";
2001          if (isprint(ch))          if (PRINTHEX(ch))
2002            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
2003          else          else
2004            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1232  while (!done) Line 2016  while (!done)
2016          else          else
2017            {            {
2018            uschar *start_bits = NULL;            uschar *start_bits = NULL;
2019            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2020    
2021              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2022              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2023    
2024              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2025            if (start_bits == NULL)            if (start_bits == NULL)
2026              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2027            else            else
2028              {              {
2029              int i;              int i;
# Line 1250  while (!done) Line 2038  while (!done)
2038                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2039                    c = 2;                    c = 2;
2040                    }                    }
2041                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
2042                    {                    {
2043                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2044                    c += 2;                    c += 2;
# Line 1282  while (!done) Line 2070  while (!done)
2070        else        else
2071          {          {
2072          uschar sbuf[8];          uschar sbuf[8];
2073          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (uschar)((true_size >> 24) & 255);
2074          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (uschar)((true_size >> 16) & 255);
2075          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (uschar)((true_size >>  8) & 255);
2076          sbuf[3] = (true_size)  & 255;          sbuf[3] = (uschar)((true_size) & 255);
2077    
2078          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (uschar)((true_study_size >> 24) & 255);
2079          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (uschar)((true_study_size >> 16) & 255);
2080          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (uschar)((true_study_size >>  8) & 255);
2081          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (uschar)((true_study_size) & 255);
2082    
2083          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2084              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1309  while (!done) Line 2097  while (!done)
2097                  strerror(errno));                  strerror(errno));
2098                }                }
2099              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
2100    
2101              }              }
2102            }            }
2103          fclose(f);          fclose(f);
# Line 1316  while (!done) Line 2105  while (!done)
2105    
2106        new_free(re);        new_free(re);
2107        if (extra != NULL) new_free(extra);        if (extra != NULL) new_free(extra);
2108        if (tables != NULL) new_free((void *)tables);        if (locale_set)
2109            {
2110            new_free((void *)tables);
2111            setlocale(LC_CTYPE, "C");
2112            locale_set = 0;
2113            }
2114        continue;  /* With next regex */        continue;  /* With next regex */
2115        }        }
2116      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1326  while (!done) Line 2120  while (!done)
2120    for (;;)    for (;;)
2121      {      {
2122      uschar *q;      uschar *q;
2123      uschar *bptr = dbuffer;      uschar *bptr;
2124      int *use_offsets = offsets;      int *use_offsets = offsets;
2125      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2126      int callout_data = 0;      int callout_data = 0;
2127      int callout_data_set = 0;      int callout_data_set = 0;
2128      int count, c;      int count, c;
2129      int copystrings = 0;      int copystrings = 0;
2130      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2131      int getstrings = 0;      int getstrings = 0;
2132      int getlist = 0;      int getlist = 0;
2133      int gmatched = 0;      int gmatched = 0;
2134      int start_offset = 0;      int start_offset = 0;
2135        int start_offset_sign = 1;
2136      int g_notempty = 0;      int g_notempty = 0;
2137      int use_dfa = 0;      int use_dfa = 0;
2138    
2139      options = 0;      options = 0;
2140    
2141        *copynames = 0;
2142        *getnames = 0;
2143    
2144        copynamesptr = copynames;
2145        getnamesptr = getnames;
2146    
2147      pcre_callout = callout;      pcre_callout = callout;
2148      first_callout = 1;      first_callout = 1;
2149      callout_extra = 0;      callout_extra = 0;
# Line 1351  while (!done) Line 2152  while (!done)
2152      callout_fail_id = -1;      callout_fail_id = -1;
2153      show_malloc = 0;      show_malloc = 0;
2154    
2155      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
2156      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
2157    
2158        len = 0;
2159        for (;;)
2160        {        {
2161        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
2162        goto CONTINUE;          {
2163            if (len > 0)    /* Reached EOF without hitting a newline */
2164              {
2165              fprintf(outfile, "\n");
2166              break;
2167              }
2168            done = 1;
2169            goto CONTINUE;
2170            }
2171          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2172          len = (int)strlen((char *)buffer);
2173          if (buffer[len-1] == '\n') break;
2174        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
2175    
     len = (int)strlen((char *)buffer);  
2176      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
2177      buffer[len] = 0;      buffer[len] = 0;
2178      if (len == 0) break;      if (len == 0) break;
# Line 1367  while (!done) Line 2180  while (!done)
2180      p = buffer;      p = buffer;
2181      while (isspace(*p)) p++;      while (isspace(*p)) p++;
2182    
2183      q = dbuffer;      bptr = q = dbuffer;
2184      while ((c = *p++) != 0)      while ((c = *p++) != 0)
2185        {        {
2186        int i = 0;        int i = 0;
# Line 1389  while (!done) Line 2202  while (!done)
2202          c -= '0';          c -= '0';
2203          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
2204            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2205    
2206    #if !defined NOUTF8
2207            if (use_utf8 && c > 255)
2208              {
2209              unsigned char buff8[8];
2210              int ii, utn;
2211              utn = ord2utf8(c, buff8);
2212              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2213              c = buff8[ii];   /* Last byte */
2214              }
2215    #endif
2216          break;          break;
2217    
2218          case 'x':          case 'x':
# Line 1406  while (!done) Line 2230  while (!done)
2230              {              {
2231              unsigned char buff8[8];              unsigned char buff8[8];
2232              int ii, utn;              int ii, utn;
2233              utn = ord2utf8(c, buff8);              if (use_utf8)
2234              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2235              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2236                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2237                  c = buff8[ii];   /* Last byte */
2238                  }
2239                else
2240                 {
2241                 if (c > 255)
2242                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2243                     "UTF-8 mode is not enabled.\n"
2244                     "** Truncation will probably give the wrong result.\n", c);
2245                 }
2246              p = pt + 1;              p = pt + 1;
2247              break;              break;
2248              }              }
# Line 1431  while (!done) Line 2265  while (!done)
2265          continue;          continue;
2266    
2267          case '>':          case '>':
2268            if (*p == '-')
2269              {
2270              start_offset_sign = -1;
2271              p++;
2272              }
2273          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2274            start_offset *= start_offset_sign;
2275          continue;          continue;
2276    
2277          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1450  while (!done) Line 2290  while (!done)
2290            }            }
2291          else if (isalnum(*p))          else if (isalnum(*p))
2292            {            {
2293            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
2294            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2295              *npp++ = 0;
2296            *npp = 0;            *npp = 0;
2297            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
2298            if (n < 0)            if (n < 0)
2299              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
2300            else copystrings |= 1 << n;            copynamesptr = npp;
2301            }            }
2302          else if (*p == '+')          else if (*p == '+')
2303            {            {
# Line 1504  while (!done) Line 2344  while (!done)
2344  #endif  #endif
2345            use_dfa = 1;            use_dfa = 1;
2346          continue;          continue;
2347    #endif
2348    
2349    #if !defined NODFA
2350          case 'F':          case 'F':
2351          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2352          continue;          continue;
# Line 1518  while (!done) Line 2360  while (!done)
2360            }            }
2361          else if (isalnum(*p))          else if (isalnum(*p))
2362            {            {
2363            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
2364            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2365              *npp++ = 0;
2366            *npp = 0;            *npp = 0;
2367            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
2368            if (n < 0)            if (n < 0)
2369              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
2370            else getstrings |= 1 << n;            getnamesptr = npp;
2371            }            }
2372          continue;          continue;
2373    
# Line 1538  while (!done) Line 2380  while (!done)
2380          continue;          continue;
2381    
2382          case 'N':          case 'N':
2383          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2384              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2385            else
2386              options |= PCRE_NOTEMPTY;
2387          continue;          continue;
2388    
2389          case 'O':          case 'O':
# Line 1551  while (!done) Line 2396  while (!done)
2396            if (offsets == NULL)            if (offsets == NULL)
2397              {              {
2398              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
2399                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
2400              yield = 1;              yield = 1;
2401              goto EXIT;              goto EXIT;
2402              }              }
# Line 1561  while (!done) Line 2406  while (!done)
2406          continue;          continue;
2407    
2408          case 'P':          case 'P':
2409          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2410              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2411            continue;
2412    
2413            case 'Q':
2414            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2415            if (extra == NULL)
2416              {
2417              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2418              extra->flags = 0;
2419              }
2420            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2421            extra->match_limit_recursion = n;
2422            continue;
2423    
2424            case 'q':
2425            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2426            if (extra == NULL)
2427              {
2428              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2429              extra->flags = 0;
2430              }
2431            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2432            extra->match_limit = n;
2433          continue;          continue;
2434    
2435  #if !defined NODFA  #if !defined NODFA
# Line 1574  while (!done) Line 2442  while (!done)
2442          show_malloc = 1;          show_malloc = 1;
2443          continue;          continue;
2444    
2445            case 'Y':
2446            options |= PCRE_NO_START_OPTIMIZE;
2447            continue;
2448    
2449          case 'Z':          case 'Z':
2450          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2451          continue;          continue;
# Line 1581  while (!done) Line 2453  while (!done)
2453          case '?':          case '?':
2454          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
2455          continue;          continue;
2456    
2457            case '<':
2458              {
2459              int x = check_newline(p, outfile);
2460              if (x == 0) goto NEXT_DATA;
2461              options |= x;
2462              while (*p++ != '>');
2463              }
2464            continue;
2465          }          }
2466        *q++ = c;        *q++ = c;
2467        }        }
2468      *q = 0;      *q = 0;
2469      len = q - dbuffer;      len = (int)(q - dbuffer);
2470    
2471        /* Move the data to the end of the buffer so that a read over the end of
2472        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2473        we are using the POSIX interface, we must include the terminating zero. */
2474    
2475    #if !defined NOPOSIX
2476        if (posix || do_posix)
2477          {
2478          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2479          bptr += buffer_size - len - 1;
2480          }
2481        else
2482    #endif
2483          {
2484          memmove(bptr + buffer_size - len, bptr, len);
2485          bptr += buffer_size - len;
2486          }
2487    
2488      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
2489        {        {
# Line 1606  while (!done) Line 2504  while (!done)
2504          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2505        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2506        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2507          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2508    
2509        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2510    
2511        if (rc != 0)        if (rc != 0)
2512          {          {
2513          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2514          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2515          }          }
2516        else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)        else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
# Line 1650  while (!done) Line 2549  while (!done)
2549    
2550      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2551        {        {
2552        if (timeit)        markptr = NULL;
2553    
2554          if (timeitm > 0)
2555          {          {
2556          register int i;          register int i;
2557          clock_t time_taken;          clock_t time_taken;
# Line 1660  while (!done) Line 2561  while (!done)
2561          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2562            {            {
2563            int workspace[1000];            int workspace[1000];
2564            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
2565              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2566                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2567                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
2568            }            }
2569          else          else
2570  #endif  #endif
2571    
2572          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2573            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2574              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2575    
2576          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2577          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2578            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2579              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2580          }          }
2581    
# Line 1690  while (!done) Line 2591  while (!done)
2591            extra->flags = 0;            extra->flags = 0;
2592            }            }
2593    
2594          count = check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
2595            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
2596            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2597            PCRE_ERROR_MATCHLIMIT, "match()");            PCRE_ERROR_MATCHLIMIT, "match()");
# Line 1724  while (!done) Line 2625  while (!done)
2625        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
2626          {          {
2627          int workspace[1000];          int workspace[1000];
2628          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2629            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
2630            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
2631          if (count == 0)          if (count == 0)
# Line 1750  while (!done) Line 2651  while (!done)
2651    
2652        if (count >= 0)        if (count >= 0)
2653          {          {
2654          int i;          int i, maxcount;
2655    
2656    #if !defined NODFA
2657            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2658    #endif
2659              maxcount = use_size_offsets/3;
2660    
2661            /* This is a check against a lunatic return value. */
2662    
2663            if (count > maxcount)
2664              {
2665              fprintf(outfile,
2666                "** PCRE error: returned count %d is too big for offset size %d\n",
2667                count, use_size_offsets);
2668              count = use_size_offsets/3;
2669              if (do_g || do_G)
2670                {
2671                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2672                do_g = do_G = FALSE;        /* Break g/G loop */
2673                }
2674              }
2675    
2676          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2677            {            {
2678            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1774  while (!done) Line 2696  while (!done)
2696              }              }
2697            }            }
2698    
2699            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2700    
2701          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2702            {            {
2703            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2704              {              {
2705              char copybuffer[16];              char copybuffer[256];
2706              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2707                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2708              if (rc < 0)              if (rc < 0)
# Line 1788  while (!done) Line 2712  while (!done)
2712              }              }
2713            }            }
2714    
2715            for (copynamesptr = copynames;
2716                 *copynamesptr != 0;
2717                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2718              {
2719              char copybuffer[256];
2720              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2721                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2722              if (rc < 0)
2723                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2724              else
2725                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2726              }
2727    
2728          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2729            {            {
2730            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1800  while (!done) Line 2737  while (!done)
2737              else              else
2738                {                {
2739                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2740                pcre_free_substring(substring);                pcre_free_substring(substring);
2741                }                }
2742              }              }
2743            }            }
2744    
2745            for (getnamesptr = getnames;
2746                 *getnamesptr != 0;
2747                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2748              {
2749              const char *substring;
2750              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2751                count, (char *)getnamesptr, &substring);
2752              if (rc < 0)
2753                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2754              else
2755                {
2756                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2757                pcre_free_substring(substring);
2758                }
2759              }
2760    
2761          if (getlist)          if (getlist)
2762            {            {
2763            const char **stringlist;            const char **stringlist;
# Line 1829  while (!done) Line 2781  while (!done)
2781    
2782        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2783          {          {
2784          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
2785  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
2786          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
2787            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
2788              bptr + use_offsets[0]);            fprintf(outfile, ": ");
2789  #endif            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2790                outfile);
2791              }
2792          fprintf(outfile, "\n");          fprintf(outfile, "\n");
2793          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2794          }          }
2795    
2796        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2797        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2798        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2799        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2800        offset values to achieve this. We won't be at the end of the string -  
2801        that was checked before setting g_notempty. */        Complication arises in the case when the newline convention is "any",
2802          "crlf", or "anycrlf". If the previous match was at the end of a line
2803          terminated by CRLF, an advance of one character just passes the \r,
2804          whereas we should prefer the longer newline sequence, as does the code in
2805          pcre_exec(). Fudge the offset value to achieve this. We check for a
2806          newline setting in the pattern; if none was set, use pcre_config() to
2807          find the default.
2808    
2809          Otherwise, in the case of UTF-8 matching, the advance must be one
2810          character, not one byte. */
2811    
2812        else        else
2813          {          {
2814          if (g_notempty != 0)          if (g_notempty != 0)
2815            {            {
2816            int onechar = 1;            int onechar = 1;
2817              unsigned int obits = ((real_pcre *)re)->options;
2818            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2819            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2820                {
2821                int d;
2822                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2823                /* Note that these values are always the ASCII ones, even in
2824                EBCDIC environments. CR = 13, NL = 10. */
2825                obits = (d == 13)? PCRE_NEWLINE_CR :
2826                        (d == 10)? PCRE_NEWLINE_LF :
2827                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2828                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2829                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2830                }
2831              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2832                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
2833                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2834                  &&
2835                  start_offset < len - 1 &&
2836                  bptr[start_offset] == '\r' &&
2837                  bptr[start_offset+1] == '\n')
2838                onechar++;
2839              else if (use_utf8)
2840              {              {
2841              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2842                {                {
2843                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
2844                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
2845                }                }
2846              }              }
2847            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
# Line 1868  while (!done) Line 2850  while (!done)
2850            {            {
2851            if (count == PCRE_ERROR_NOMATCH)            if (count == PCRE_ERROR_NOMATCH)
2852              {              {
2853              if (gmatched == 0) fprintf(outfile, "No match\n");              if (gmatched == 0)
2854                  {
2855                  if (markptr == NULL) fprintf(outfile, "No match\n");
2856                    else fprintf(outfile, "No match, mark = %s\n", markptr);
2857                  }
2858              }              }
2859            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2860            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
# Line 1880  while (!done) Line 2866  while (!done)
2866        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
2867    
2868        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2869        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2870        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
2871        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2872        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
2873        character. */        character. */
2874    
2875        g_notempty = 0;        g_notempty = 0;
2876    
2877        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2878          {          {
2879          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
2880          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2881          }          }
2882    
2883        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 1905  while (!done) Line 2892  while (!done)
2892          len -= use_offsets[1];          len -= use_offsets[1];
2893          }          }
2894        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2895    
2896        NEXT_DATA: continue;
2897      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2898    
2899    CONTINUE:    CONTINUE:
# Line 1915  while (!done) Line 2904  while (!done)
2904    
2905    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
2906    if (extra != NULL) new_free(extra);    if (extra != NULL) new_free(extra);
2907    if (tables != NULL)    if (locale_set)
2908      {      {
2909      new_free((void *)tables);      new_free((void *)tables);
2910      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2911        locale_set = 0;
2912      }      }
2913    }    }
2914    

Legend:
Removed from v.87  
changed lines
  Added in v.567

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12