/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 227 by ph10, Tue Aug 21 15:00:15 2007 UTC code/branches/pcre16/pcretest.c revision 810 by ph10, Mon Dec 19 13:34:10 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 37  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38    
39    
40  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
41  #include <config.h>  #include "config.h"
42  #endif  #endif
43    
44  #include <ctype.h>  #include <ctype.h>
# Line 48  POSSIBILITY OF SUCH DAMAGE. Line 49  POSSIBILITY OF SUCH DAMAGE.
49  #include <locale.h>  #include <locale.h>
50  #include <errno.h>  #include <errno.h>
51    
52    #ifdef SUPPORT_LIBREADLINE
53    #ifdef HAVE_UNISTD_H
54    #include <unistd.h>
55    #endif
56    #include <readline/readline.h>
57    #include <readline/history.h>
58    #endif
59    
60    
61  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
62  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 63  input mode under Windows. */ Line 72  input mode under Windows. */
72  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
73  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
74    
75    #ifndef isatty
76    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
77    #endif                         /* though in some environments they seem to   */
78                                   /* be already defined, hence the #ifndefs.    */
79    #ifndef fileno
80    #define fileno _fileno
81    #endif
82    
83    /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85    #ifdef __BORLANDC__
86    #define _setmode(handle, mode) setmode(handle, mode)
87    #endif
88    
89    /* Not Windows */
90    
91  #else  #else
92  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
93  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 83  appropriately for an application, not fo Line 108  appropriately for an application, not fo
108  #include "pcre.h"  #include "pcre.h"
109  #include "pcre_internal.h"  #include "pcre_internal.h"
110    
111  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
112  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
113  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114    when that is compiled in debug mode. */
115    
116    #ifdef SUPPORT_PCRE8
117    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118    #endif
119    #ifdef SUPPORT_PCRE16
120    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121    #endif
122    
123    /* We need access to some of the data tables that PCRE uses. So as not to have
124    to keep two copies, we include the source file here, changing the names of the
125    external symbols to prevent clashes. */
126    
127    #define _pcre_ucp_gentype      ucp_gentype
128    #define _pcre_ucp_typerange    ucp_typerange
129  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
130  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
131  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 94  symbols to prevent clashes. */ Line 133  symbols to prevent clashes. */
133  #define _pcre_utf8_table4      utf8_table4  #define _pcre_utf8_table4      utf8_table4
134  #define _pcre_utt              utt  #define _pcre_utt              utt
135  #define _pcre_utt_size         utt_size  #define _pcre_utt_size         utt_size
136    #define _pcre_utt_names        utt_names
137  #define _pcre_OP_lengths       OP_lengths  #define _pcre_OP_lengths       OP_lengths
138    
139  #include "pcre_tables.c"  #include "pcre_tables.c"
140    
141  /* We also need the pcre_printint() function for printing out compiled  /* The definition of the macro PRINTABLE, which determines whether to print an
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled.  
   
 The definition of the macro PRINTABLE, which determines whether to print an  
142  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
143  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
144  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
145  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
146    
147  #include "pcre_printint.src"  #ifdef EBCDIC
148    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #else
150    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151    #endif
152    
153    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154    
155  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
156  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 121  Makefile. */ Line 160  Makefile. */
160  #include "pcreposix.h"  #include "pcreposix.h"
161  #endif  #endif
162    
163  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
164  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165  interface to the DFA matcher (NODFA), and without the doublecheck of the old  without the interface to the DFA matcher (NODFA), and without the doublecheck
166  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167  UTF8 support if PCRE is built without it. */  out the UTF8 support if PCRE is built without it. */
168    
169  #ifndef SUPPORT_UTF8  #ifndef SUPPORT_UTF8
170  #ifndef NOUTF8  #ifndef NOUTF8
# Line 133  UTF8 support if PCRE is built without it Line 172  UTF8 support if PCRE is built without it
172  #endif  #endif
173  #endif  #endif
174    
175    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177    only from one place and is handled differently). I couldn't dream up any way of
178    using a single macro to do this in a generic way, because of the many different
179    argument requirements. We know that at least one of SUPPORT_PCRE8 and
180    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181    use these in the definitions of generic macros. */
182    
183    #ifdef SUPPORT_PCRE8
184    #define PCHARS8(lv, p, len, f) \
185      lv = pchars((pcre_uint8 *)p, len, f)
186    
187    #define PCHARSV8(p, len, f) \
188      (void)pchars((pcre_uint8 *)p, len, f)
189    
190    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191      re = pcre_compile((char *)pat, options, error, erroffset, tables)
192    
193    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194        offsets, size_offsets) \
195      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196        offsets, size_offsets)
197    
198    #define PCRE_STUDY8(extra, re, options, error) \
199      extra = pcre_study(re, options, error)
200    
201    #define PCRE_FREE_STUDY8(extra) \
202      pcre_free_study(extra)
203    
204    #endif /* SUPPORT_PCRE8 */
205    
206    
207    #ifdef SUPPORT_PCRE16
208    #define PCHARS16(lv, p, len, f) \
209      lv = pchars16((PCRE_SPTR16)p, len, f)
210    
211    #define PCHARSV16(p, len, f) \
212      (void)pchars16((PCRE_SPTR16)p, len, f)
213    
214    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
215      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
216    
217    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
218        offsets, size_offsets) \
219      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
220        options, offsets, size_offsets)
221    
222    #define PCRE_FREE_STUDY16(extra) \
223      pcre16_free_study(extra)
224    
225    #define PCRE_STUDY16(extra, re, options, error) \
226      extra = pcre16_study(re, options, error)
227    
228    #endif /* SUPPORT_PCRE16 */
229    
230    
231    /* ----- Both modes are supported; a runtime test is needed ----- */
232    
233    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
234    
235    #define PCHARS(lv, p, len, f) \
236      if (use_pcre16) \
237        PCHARS16(lv, p, len, f); \
238      else \
239        PCHARS8(lv, p, len, f)
240    
241    #define PCHARSV(p, len, f) \
242      if (use_pcre16) \
243        PCHARSV16(p, len, f); \
244      else \
245        PCHARSV8(p, len, f)
246    
247    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
248      if (use_pcre16) \
249        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
250      else \
251        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
252    
253    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
254        offsets, size_offsets) \
255      if (use_pcre16) \
256        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
257          offsets, size_offsets); \
258      else \
259        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
260          offsets, size_offsets)
261    
262    #define PCRE_FREE_STUDY(extra) \
263      if (use_pcre16) \
264        PCRE_FREE_STUDY16(extra); \
265      else \
266        PCRE_FREE_STUDY8(extra)
267    
268    #define PCRE_STUDY(extra, re, options, error) \
269      if (use_pcre16) \
270        PCRE_STUDY16(extra, re, options, error); \
271      else \
272        PCRE_STUDY8(extra, re, options, error)
273    
274    /* ----- Only 8-bit mode is supported ----- */
275    
276    #elif defined SUPPORT_PCRE8
277    #define PCHARS           PCHARS8
278    #define PCHARSV          PCHARSV8
279    #define PCRE_COMPILE     PCRE_COMPILE8
280    #define PCRE_EXEC        PCRE_EXEC8
281    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY8
282    #define PCRE_STUDY       PCRE_STUDY8
283    
284    /* ----- Only 16-bit mode is supported ----- */
285    
286    #else
287    #define PCHARS           PCHARS16
288    #define PCHARSV          PCHARSV16
289    #define PCRE_COMPILE     PCRE_COMPILE16
290    #define PCRE_EXEC        PCRE_EXEC16
291    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY16
292    #define PCRE_STUDY       PCRE_STUDY16
293    #endif
294    
295    /* ----- End of mode-specific function call macros ----- */
296    
297    
298  /* Other parameters */  /* Other parameters */
299    
# Line 160  static int debug_lengths; Line 321  static int debug_lengths;
321  static int first_callout;  static int first_callout;
322  static int locale_set = 0;  static int locale_set = 0;
323  static int show_malloc;  static int show_malloc;
324  static int use_utf8;  static int use_utf;
325  static size_t gotten_store;  static size_t gotten_store;
326    static size_t first_gotten_store = 0;
327    static const unsigned char *last_callout_mark = NULL;
328    
329  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
330    
331  static int buffer_size = 50000;  static int buffer_size = 50000;
332  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
333  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
334  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
335    
336    #ifdef SUPPORT_PCRE16
337    static int buffer16_size = 0;
338    static pcre_uint16 *buffer16 = NULL;
339    #endif
340    
341    /* If we have 8-bit support, default use_pcre16 to false; if there is also
342    16-bit support, it can be changed by an option. If there is no 8-bit support,
343    there must be 16-bit support, so default it to 1. */
344    
345  /*************************************************  #ifdef SUPPORT_PCRE8
346  *        Read or extend an input line            *  static int use_pcre16 = 0;
347  *************************************************/  #else
348    static int use_pcre16 = 1;
349    #endif
350    
351  /* Input lines are read into buffer, but both patterns and data lines can be  /* Textual explanations for runtime error codes */
 continued over multiple input lines. In addition, if the buffer fills up, we  
 want to automatically expand it so as to be able to handle extremely large  
 lines that are needed for certain stress tests. When the input buffer is  
 expanded, the other two buffers must also be expanded likewise, and the  
 contents of pbuffer, which are a copy of the input for callouts, must be  
 preserved (for when expansion happens for a data line). This is not the most  
 optimal way of handling this, but hey, this is just a test program!  
352    
353  Arguments:  static const char *errtexts[] = {
354    f            the file to read    NULL,  /* 0 is no error */
355    start        where in buffer to start (this *must* be within buffer)    NULL,  /* NOMATCH is handled specially */
356      "NULL argument passed",
357      "bad option value",
358      "magic number missing",
359      "unknown opcode - pattern overwritten?",
360      "no more memory",
361      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
362      "match limit exceeded",
363      "callout error code",
364      NULL,  /* BADUTF8 is handled specially */
365      "bad UTF-8 offset",
366      NULL,  /* PARTIAL is handled specially */
367      "not used - internal error",
368      "internal error - pattern overwritten?",
369      "bad count value",
370      "item unsupported for DFA matching",
371      "backreference condition or recursion test not supported for DFA matching",
372      "match limit not supported for DFA matching",
373      "workspace size exceeded in DFA matching",
374      "too much recursion for DFA matching",
375      "recursion limit exceeded",
376      "not used - internal error",
377      "invalid combination of newline options",
378      "bad offset value",
379      NULL,  /* SHORTUTF8 is handled specially */
380      "nested recursion at the same subject position",
381      "JIT stack limit reached",
382      "pattern compiled in wrong mode (8-bit/16-bit error)"
383    };
384    
 Returns:       pointer to the start of new data  
                could be a copy of start, or could be moved  
                NULL if no data read and EOF reached  
 */  
385    
386  static uschar *  /*************************************************
387  extend_inputline(FILE *f, uschar *start)  *         Alternate character tables             *
388  {  *************************************************/
 uschar *here = start;  
389    
390  for (;;)  /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
391    {  using the default tables of the library. However, the T option can be used to
392    int rlen = buffer_size - (here - buffer);  select alternate sets of tables, for different kinds of testing. Note also that
393    the L (locale) option also adjusts the tables. */
394    
395    /* This is the set of tables distributed as default with PCRE. It recognizes
396    only ASCII characters. */
397    
398    static const pcre_uint8 tables0[] = {
399    
400    /* This table is a lower casing table. */
401    
402        0,  1,  2,  3,  4,  5,  6,  7,
403        8,  9, 10, 11, 12, 13, 14, 15,
404       16, 17, 18, 19, 20, 21, 22, 23,
405       24, 25, 26, 27, 28, 29, 30, 31,
406       32, 33, 34, 35, 36, 37, 38, 39,
407       40, 41, 42, 43, 44, 45, 46, 47,
408       48, 49, 50, 51, 52, 53, 54, 55,
409       56, 57, 58, 59, 60, 61, 62, 63,
410       64, 97, 98, 99,100,101,102,103,
411      104,105,106,107,108,109,110,111,
412      112,113,114,115,116,117,118,119,
413      120,121,122, 91, 92, 93, 94, 95,
414       96, 97, 98, 99,100,101,102,103,
415      104,105,106,107,108,109,110,111,
416      112,113,114,115,116,117,118,119,
417      120,121,122,123,124,125,126,127,
418      128,129,130,131,132,133,134,135,
419      136,137,138,139,140,141,142,143,
420      144,145,146,147,148,149,150,151,
421      152,153,154,155,156,157,158,159,
422      160,161,162,163,164,165,166,167,
423      168,169,170,171,172,173,174,175,
424      176,177,178,179,180,181,182,183,
425      184,185,186,187,188,189,190,191,
426      192,193,194,195,196,197,198,199,
427      200,201,202,203,204,205,206,207,
428      208,209,210,211,212,213,214,215,
429      216,217,218,219,220,221,222,223,
430      224,225,226,227,228,229,230,231,
431      232,233,234,235,236,237,238,239,
432      240,241,242,243,244,245,246,247,
433      248,249,250,251,252,253,254,255,
434    
435    /* This table is a case flipping table. */
436    
437        0,  1,  2,  3,  4,  5,  6,  7,
438        8,  9, 10, 11, 12, 13, 14, 15,
439       16, 17, 18, 19, 20, 21, 22, 23,
440       24, 25, 26, 27, 28, 29, 30, 31,
441       32, 33, 34, 35, 36, 37, 38, 39,
442       40, 41, 42, 43, 44, 45, 46, 47,
443       48, 49, 50, 51, 52, 53, 54, 55,
444       56, 57, 58, 59, 60, 61, 62, 63,
445       64, 97, 98, 99,100,101,102,103,
446      104,105,106,107,108,109,110,111,
447      112,113,114,115,116,117,118,119,
448      120,121,122, 91, 92, 93, 94, 95,
449       96, 65, 66, 67, 68, 69, 70, 71,
450       72, 73, 74, 75, 76, 77, 78, 79,
451       80, 81, 82, 83, 84, 85, 86, 87,
452       88, 89, 90,123,124,125,126,127,
453      128,129,130,131,132,133,134,135,
454      136,137,138,139,140,141,142,143,
455      144,145,146,147,148,149,150,151,
456      152,153,154,155,156,157,158,159,
457      160,161,162,163,164,165,166,167,
458      168,169,170,171,172,173,174,175,
459      176,177,178,179,180,181,182,183,
460      184,185,186,187,188,189,190,191,
461      192,193,194,195,196,197,198,199,
462      200,201,202,203,204,205,206,207,
463      208,209,210,211,212,213,214,215,
464      216,217,218,219,220,221,222,223,
465      224,225,226,227,228,229,230,231,
466      232,233,234,235,236,237,238,239,
467      240,241,242,243,244,245,246,247,
468      248,249,250,251,252,253,254,255,
469    
470    /* This table contains bit maps for various character classes. Each map is 32
471    bytes long and the bits run from the least significant end of each byte. The
472    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
473    graph, print, punct, and cntrl. Other classes are built from combinations. */
474    
475      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
476      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
477      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
478      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
479    
480      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
481      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
482      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
483      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
484    
485      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
486      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
487      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
488      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
489    
490      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
491      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
492      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
493      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
494    
495      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
496      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
497      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
498      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
499    
500      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
501      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
502      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
503      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
504    
505      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
506      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
507      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
508      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
509    
510      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
511      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
512      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
513      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
514    
515      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
516      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
517      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
518      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
519    
520      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
521      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
522      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
523      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
524    
525    /* This table identifies various classes of character by individual bits:
526      0x01   white space character
527      0x02   letter
528      0x04   decimal digit
529      0x08   hexadecimal digit
530      0x10   alphanumeric or '_'
531      0x80   regular expression metacharacter or binary zero
532    */
533    
534    if (rlen > 1000)    0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
535      {    0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
536      int dlen;    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
537      if (fgets((char *)here, rlen,  f) == NULL)    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
538        return (here == start)? NULL : start;    0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
539      dlen = (int)strlen((char *)here);    0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
540      if (dlen > 0 && here[dlen - 1] == '\n') return start;    0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
541      here += dlen;    0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
542      }    0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
543      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
544      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
545      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
546      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
547      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
548      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
549      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
550      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
551      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
552      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
553      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
554      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
555      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
556      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
557      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
558      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
559      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
560      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
561      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
562      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
563      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
564      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
565      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
566    
567    /* This is a set of tables that came orginally from a Windows user. It seems to
568    be at least an approximation of ISO 8859. In particular, there are characters
569    greater than 128 that are marked as spaces, letters, etc. */
570    
571    static const pcre_uint8 tables1[] = {
572    0,1,2,3,4,5,6,7,
573    8,9,10,11,12,13,14,15,
574    16,17,18,19,20,21,22,23,
575    24,25,26,27,28,29,30,31,
576    32,33,34,35,36,37,38,39,
577    40,41,42,43,44,45,46,47,
578    48,49,50,51,52,53,54,55,
579    56,57,58,59,60,61,62,63,
580    64,97,98,99,100,101,102,103,
581    104,105,106,107,108,109,110,111,
582    112,113,114,115,116,117,118,119,
583    120,121,122,91,92,93,94,95,
584    96,97,98,99,100,101,102,103,
585    104,105,106,107,108,109,110,111,
586    112,113,114,115,116,117,118,119,
587    120,121,122,123,124,125,126,127,
588    128,129,130,131,132,133,134,135,
589    136,137,138,139,140,141,142,143,
590    144,145,146,147,148,149,150,151,
591    152,153,154,155,156,157,158,159,
592    160,161,162,163,164,165,166,167,
593    168,169,170,171,172,173,174,175,
594    176,177,178,179,180,181,182,183,
595    184,185,186,187,188,189,190,191,
596    224,225,226,227,228,229,230,231,
597    232,233,234,235,236,237,238,239,
598    240,241,242,243,244,245,246,215,
599    248,249,250,251,252,253,254,223,
600    224,225,226,227,228,229,230,231,
601    232,233,234,235,236,237,238,239,
602    240,241,242,243,244,245,246,247,
603    248,249,250,251,252,253,254,255,
604    0,1,2,3,4,5,6,7,
605    8,9,10,11,12,13,14,15,
606    16,17,18,19,20,21,22,23,
607    24,25,26,27,28,29,30,31,
608    32,33,34,35,36,37,38,39,
609    40,41,42,43,44,45,46,47,
610    48,49,50,51,52,53,54,55,
611    56,57,58,59,60,61,62,63,
612    64,97,98,99,100,101,102,103,
613    104,105,106,107,108,109,110,111,
614    112,113,114,115,116,117,118,119,
615    120,121,122,91,92,93,94,95,
616    96,65,66,67,68,69,70,71,
617    72,73,74,75,76,77,78,79,
618    80,81,82,83,84,85,86,87,
619    88,89,90,123,124,125,126,127,
620    128,129,130,131,132,133,134,135,
621    136,137,138,139,140,141,142,143,
622    144,145,146,147,148,149,150,151,
623    152,153,154,155,156,157,158,159,
624    160,161,162,163,164,165,166,167,
625    168,169,170,171,172,173,174,175,
626    176,177,178,179,180,181,182,183,
627    184,185,186,187,188,189,190,191,
628    224,225,226,227,228,229,230,231,
629    232,233,234,235,236,237,238,239,
630    240,241,242,243,244,245,246,215,
631    248,249,250,251,252,253,254,223,
632    192,193,194,195,196,197,198,199,
633    200,201,202,203,204,205,206,207,
634    208,209,210,211,212,213,214,247,
635    216,217,218,219,220,221,222,255,
636    0,62,0,0,1,0,0,0,
637    0,0,0,0,0,0,0,0,
638    32,0,0,0,1,0,0,0,
639    0,0,0,0,0,0,0,0,
640    0,0,0,0,0,0,255,3,
641    126,0,0,0,126,0,0,0,
642    0,0,0,0,0,0,0,0,
643    0,0,0,0,0,0,0,0,
644    0,0,0,0,0,0,255,3,
645    0,0,0,0,0,0,0,0,
646    0,0,0,0,0,0,12,2,
647    0,0,0,0,0,0,0,0,
648    0,0,0,0,0,0,0,0,
649    254,255,255,7,0,0,0,0,
650    0,0,0,0,0,0,0,0,
651    255,255,127,127,0,0,0,0,
652    0,0,0,0,0,0,0,0,
653    0,0,0,0,254,255,255,7,
654    0,0,0,0,0,4,32,4,
655    0,0,0,128,255,255,127,255,
656    0,0,0,0,0,0,255,3,
657    254,255,255,135,254,255,255,7,
658    0,0,0,0,0,4,44,6,
659    255,255,127,255,255,255,127,255,
660    0,0,0,0,254,255,255,255,
661    255,255,255,255,255,255,255,127,
662    0,0,0,0,254,255,255,255,
663    255,255,255,255,255,255,255,255,
664    0,2,0,0,255,255,255,255,
665    255,255,255,255,255,255,255,127,
666    0,0,0,0,255,255,255,255,
667    255,255,255,255,255,255,255,255,
668    0,0,0,0,254,255,0,252,
669    1,0,0,248,1,0,0,120,
670    0,0,0,0,254,255,255,255,
671    0,0,128,0,0,0,128,0,
672    255,255,255,255,0,0,0,0,
673    0,0,0,0,0,0,0,128,
674    255,255,255,255,0,0,0,0,
675    0,0,0,0,0,0,0,0,
676    128,0,0,0,0,0,0,0,
677    0,1,1,0,1,1,0,0,
678    0,0,0,0,0,0,0,0,
679    0,0,0,0,0,0,0,0,
680    1,0,0,0,128,0,0,0,
681    128,128,128,128,0,0,128,0,
682    28,28,28,28,28,28,28,28,
683    28,28,0,0,0,0,0,128,
684    0,26,26,26,26,26,26,18,
685    18,18,18,18,18,18,18,18,
686    18,18,18,18,18,18,18,18,
687    18,18,18,128,128,0,128,16,
688    0,26,26,26,26,26,26,18,
689    18,18,18,18,18,18,18,18,
690    18,18,18,18,18,18,18,18,
691    18,18,18,128,128,0,0,0,
692    0,0,0,0,0,1,0,0,
693    0,0,0,0,0,0,0,0,
694    0,0,0,0,0,0,0,0,
695    0,0,0,0,0,0,0,0,
696    1,0,0,0,0,0,0,0,
697    0,0,18,0,0,0,0,0,
698    0,0,20,20,0,18,0,0,
699    0,20,18,0,0,0,0,0,
700    18,18,18,18,18,18,18,18,
701    18,18,18,18,18,18,18,18,
702    18,18,18,18,18,18,18,0,
703    18,18,18,18,18,18,18,18,
704    18,18,18,18,18,18,18,18,
705    18,18,18,18,18,18,18,18,
706    18,18,18,18,18,18,18,0,
707    18,18,18,18,18,18,18,18
708    };
709    
   else  
     {  
     int new_buffer_size = 2*buffer_size;  
     uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);  
     uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);  
710    
     if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)  
       {  
       fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);  
       exit(1);  
       }  
711    
     memcpy(new_buffer, buffer, buffer_size);  
     memcpy(new_pbuffer, pbuffer, buffer_size);  
   
     buffer_size = new_buffer_size;  
712    
713      start = new_buffer + (start - buffer);  #ifndef HAVE_STRERROR
714      here = new_buffer + (here - buffer);  /*************************************************
715    *     Provide strerror() for non-ANSI libraries  *
716    *************************************************/
717    
718      free(buffer);  /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
719      free(dbuffer);  in their libraries, but can provide the same facility by this simple
720      free(pbuffer);  alternative function. */
721    
722      buffer = new_buffer;  extern int   sys_nerr;
723      dbuffer = new_dbuffer;  extern char *sys_errlist[];
     pbuffer = new_pbuffer;  
     }  
   }  
724    
725  return NULL;  /* Control never gets here */  char *
726    strerror(int n)
727    {
728    if (n < 0 || n >= sys_nerr) return "unknown error number";
729    return sys_errlist[n];
730  }  }
731    #endif /* HAVE_STRERROR */
   
   
   
   
732    
733    
734  /*************************************************  /*************************************************
735  *          Read number from string               *  *         JIT memory callback                    *
736  *************************************************/  *************************************************/
737    
738  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  static pcre_jit_stack* jit_callback(void *arg)
 around with conditional compilation, just do the job by hand. It is only used  
 for unpicking arguments, so just keep it simple.  
   
 Arguments:  
   str           string to be converted  
   endptr        where to put the end pointer  
   
 Returns:        the unsigned long  
 */  
   
 static int  
 get_value(unsigned char *str, unsigned char **endptr)  
739  {  {
740  int result = 0;  return (pcre_jit_stack *)arg;
 while(*str != 0 && isspace(*str)) str++;  
 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');  
 *endptr = str;  
 return(result);  
741  }  }
742    
743    
   
   
744  /*************************************************  /*************************************************
745  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
746  *************************************************/  *************************************************/
# Line 299  Returns: > 0 => the number of byte Line 759  Returns: > 0 => the number of byte
759  #if !defined NOUTF8  #if !defined NOUTF8
760    
761  static int  static int
762  utf82ord(unsigned char *utf8bytes, int *vptr)  utf82ord(pcre_uint8 *utf8bytes, int *vptr)
763  {  {
764  int c = *utf8bytes++;  int c = *utf8bytes++;
765  int d = c;  int d = c;
# Line 360  Returns: number of characters placed Line 820  Returns: number of characters placed
820  #if !defined NOUTF8  #if !defined NOUTF8
821    
822  static int  static int
823  ord2utf8(int cvalue, uschar *utf8bytes)  ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
824  {  {
825  register int i, j;  register int i, j;
826  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
# Line 379  return i + 1; Line 839  return i + 1;
839    
840    
841    
842    #ifdef SUPPORT_PCRE16
843  /*************************************************  /*************************************************
844  *             Print character string             *  *         Convert a string to 16-bit             *
845  *************************************************/  *************************************************/
846    
847  /* Character string printing function. Must handle UTF-8 strings in utf8  /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
848  mode. Yields number of characters printed. If handed a NULL file, just counts  8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
849  chars without printing. */  double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
850    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
851    result is always left in buffer16.
852    
853    Arguments:
854      p          points to a byte string
855      utf        true if UTF-8 (to be converted to UTF-16)
856      len        number of bytes in the string (excluding trailing zero)
857    
858    Returns:     number of 16-bit data items used (excluding trailing zero)
859                 OR -1 if a UTF-8 string is malformed
860    */
861    
862  static int pchars(unsigned char *p, int length, FILE *f)  static int
863    to16(pcre_uint8 *p, int utf, int len)
864    {
865    pcre_uint16 *pp;
866    
867    if (buffer16_size < 2*len + 2)
868      {
869      if (buffer16 != NULL) free(buffer16);
870      buffer16_size = 2*len + 2;
871      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
872      if (buffer16 == NULL)
873        {
874        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
875        exit(1);
876        }
877      }
878    
879    pp = buffer16;
880    
881    if (!utf)
882      {
883      while (len-- > 0) *pp++ = *p++;
884      }
885    
886    else
887      {
888      int c;
889      while (len > 0)
890        {
891        int chlen = utf82ord(p, &c);
892        if (chlen <= 0) return -1;
893        p += chlen;
894        len -= chlen;
895        if (c < 0x10000) *pp++ = c; else
896          {
897          c -= 0x10000;
898          *pp++ = 0xD800 | (c >> 10);
899          *pp++ = 0xDC00 | (c & 0x3ff);
900          }
901        }
902      }
903    
904    *pp = 0;
905    return pp - buffer16;
906    }
907    #endif
908    
909    
910    /*************************************************
911    *        Read or extend an input line            *
912    *************************************************/
913    
914    /* Input lines are read into buffer, but both patterns and data lines can be
915    continued over multiple input lines. In addition, if the buffer fills up, we
916    want to automatically expand it so as to be able to handle extremely large
917    lines that are needed for certain stress tests. When the input buffer is
918    expanded, the other two buffers must also be expanded likewise, and the
919    contents of pbuffer, which are a copy of the input for callouts, must be
920    preserved (for when expansion happens for a data line). This is not the most
921    optimal way of handling this, but hey, this is just a test program!
922    
923    Arguments:
924      f            the file to read
925      start        where in buffer to start (this *must* be within buffer)
926      prompt       for stdin or readline()
927    
928    Returns:       pointer to the start of new data
929                   could be a copy of start, or could be moved
930                   NULL if no data read and EOF reached
931    */
932    
933    static pcre_uint8 *
934    extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
935    {
936    pcre_uint8 *here = start;
937    
938    for (;;)
939      {
940      int rlen = (int)(buffer_size - (here - buffer));
941    
942      if (rlen > 1000)
943        {
944        int dlen;
945    
946        /* If libreadline support is required, use readline() to read a line if the
947        input is a terminal. Note that readline() removes the trailing newline, so
948        we must put it back again, to be compatible with fgets(). */
949    
950    #ifdef SUPPORT_LIBREADLINE
951        if (isatty(fileno(f)))
952          {
953          size_t len;
954          char *s = readline(prompt);
955          if (s == NULL) return (here == start)? NULL : start;
956          len = strlen(s);
957          if (len > 0) add_history(s);
958          if (len > rlen - 1) len = rlen - 1;
959          memcpy(here, s, len);
960          here[len] = '\n';
961          here[len+1] = 0;
962          free(s);
963          }
964        else
965    #endif
966    
967        /* Read the next line by normal means, prompting if the file is stdin. */
968    
969          {
970          if (f == stdin) printf("%s", prompt);
971          if (fgets((char *)here, rlen,  f) == NULL)
972            return (here == start)? NULL : start;
973          }
974    
975        dlen = (int)strlen((char *)here);
976        if (dlen > 0 && here[dlen - 1] == '\n') return start;
977        here += dlen;
978        }
979    
980      else
981        {
982        int new_buffer_size = 2*buffer_size;
983        pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
984        pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
985        pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
986    
987        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
988          {
989          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
990          exit(1);
991          }
992    
993        memcpy(new_buffer, buffer, buffer_size);
994        memcpy(new_pbuffer, pbuffer, buffer_size);
995    
996        buffer_size = new_buffer_size;
997    
998        start = new_buffer + (start - buffer);
999        here = new_buffer + (here - buffer);
1000    
1001        free(buffer);
1002        free(dbuffer);
1003        free(pbuffer);
1004    
1005        buffer = new_buffer;
1006        dbuffer = new_dbuffer;
1007        pbuffer = new_pbuffer;
1008        }
1009      }
1010    
1011    return NULL;  /* Control never gets here */
1012    }
1013    
1014    
1015    
1016    /*************************************************
1017    *          Read number from string               *
1018    *************************************************/
1019    
1020    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
1021    around with conditional compilation, just do the job by hand. It is only used
1022    for unpicking arguments, so just keep it simple.
1023    
1024    Arguments:
1025      str           string to be converted
1026      endptr        where to put the end pointer
1027    
1028    Returns:        the unsigned long
1029    */
1030    
1031    static int
1032    get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1033    {
1034    int result = 0;
1035    while(*str != 0 && isspace(*str)) str++;
1036    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
1037    *endptr = str;
1038    return(result);
1039    }
1040    
1041    
1042    
1043    /*************************************************
1044    *             Print one character                *
1045    *************************************************/
1046    
1047    /* Print a single character either literally, or as a hex escape. */
1048    
1049    static int pchar(int c, FILE *f)
1050    {
1051    if (PRINTOK(c))
1052      {
1053      if (f != NULL) fprintf(f, "%c", c);
1054      return 1;
1055      }
1056    
1057    if (c < 0x100)
1058      {
1059      if (use_utf)
1060        {
1061        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1062        return 6;
1063        }
1064      else
1065        {
1066        if (f != NULL) fprintf(f, "\\x%02x", c);
1067        return 4;
1068        }
1069      }
1070    
1071    if (f != NULL) fprintf(f, "\\x{%02x}", c);
1072    return (c <= 0x000000ff)? 6 :
1073           (c <= 0x00000fff)? 7 :
1074           (c <= 0x0000ffff)? 8 :
1075           (c <= 0x000fffff)? 9 : 10;
1076    }
1077    
1078    
1079    
1080    #ifdef SUPPORT_PCRE8
1081    /*************************************************
1082    *         Print 8-bit character string           *
1083    *************************************************/
1084    
1085    /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1086    If handed a NULL file, just counts chars without printing. */
1087    
1088    static int pchars(pcre_uint8 *p, int length, FILE *f)
1089  {  {
1090  int c = 0;  int c = 0;
1091  int yield = 0;  int yield = 0;
# Line 395  int yield = 0; Line 1093  int yield = 0;
1093  while (length-- > 0)  while (length-- > 0)
1094    {    {
1095  #if !defined NOUTF8  #if !defined NOUTF8
1096    if (use_utf8)    if (use_utf)
1097      {      {
1098      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1099      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1100        {        {
1101        length -= rc - 1;        length -= rc - 1;
1102        p += rc;        p += rc;
1103        if (PRINTHEX(c))        yield += pchar(c, f);
1104          {        continue;
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1105        }        }
1106      }      }
1107  #endif  #endif
1108      c = *p++;
1109      yield += pchar(c, f);
1110      }
1111    
1112     /* Not UTF-8, or malformed UTF-8  */  return yield;
1113    }
1114    #endif
1115    
1116    c = *p++;  
1117    if (PRINTHEX(c))  
1118      {  #ifdef SUPPORT_PCRE16
1119      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1120      yield++;  *           Print 16-bit character string        *
1121      }  *************************************************/
1122    else  
1123    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1124    If handed a NULL file, just counts chars without printing. */
1125    
1126    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1127    {
1128    int yield = 0;
1129    
1130    while (length-- > 0)
1131      {
1132      int c = *p++ & 0xffff;
1133    #if !defined NOUTF8
1134      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1135      {      {
1136      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1137      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1138      }        {
1139          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1140          length--;
1141          p++;
1142          }
1143        }
1144    #endif
1145      yield += pchar(c, f);
1146    }    }
1147    
1148  return yield;  return yield;
1149  }  }
1150    #endif
1151    
1152    
1153    
# Line 467  if (callout_extra) Line 1176  if (callout_extra)
1176      else      else
1177        {        {
1178        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1179        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject + cb->offset_vector[i],
1180          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1181        fprintf(f, "\n");        fprintf(f, "\n");
1182        }        }
# Line 480  printed lengths of the substrings. */ Line 1189  printed lengths of the substrings. */
1189    
1190  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1191    
1192  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, cb->start_match, f);
1193  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject + cb->start_match,
1194    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1195    
1196  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1197    
1198  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject + cb->current_position,
1199    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1200    
1201  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 523  fprintf(outfile, "%.*s", (cb->next_item_ Line 1232  fprintf(outfile, "%.*s", (cb->next_item_
1232  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1233  first_callout = 0;  first_callout = 0;
1234    
1235    if (cb->mark != last_callout_mark)
1236      {
1237      fprintf(outfile, "Latest Mark: %s\n",
1238        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1239      last_callout_mark = cb->mark;
1240      }
1241    
1242  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1243    {    {
1244    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 542  return (cb->callout_number != callout_fa Line 1258  return (cb->callout_number != callout_fa
1258  *            Local malloc functions              *  *            Local malloc functions              *
1259  *************************************************/  *************************************************/
1260    
1261  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1262  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1263    show_malloc variable is set only during matching. */
1264    
1265  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1266  {  {
1267  void *block = malloc(size);  void *block = malloc(size);
1268  gotten_store = size;  gotten_store = size;
1269    if (first_gotten_store == 0) first_gotten_store = size;
1270  if (show_malloc)  if (show_malloc)
1271    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1272  return block;  return block;
# Line 561  if (show_malloc) Line 1279  if (show_malloc)
1279  free(block);  free(block);
1280  }  }
1281    
   
1282  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1283    
1284  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 584  free(block); Line 1301  free(block);
1301  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1302  *************************************************/  *************************************************/
1303    
1304  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1305    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1306    value, but the code is defensive. */
1307    
1308  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1309  {  {
1310  int rc;  int rc;
1311  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1312    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1313    #ifdef SUPPORT_PCRE16
1314      rc = pcre16_fullinfo(re, study, option, ptr);
1315    #else
1316      rc = PCRE_ERROR_BADMODE;
1317    #endif
1318    else
1319    #ifdef SUPPORT_PCRE8
1320      rc = pcre_fullinfo(re, study, option, ptr);
1321    #else
1322      rc = PCRE_ERROR_BADMODE;
1323    #endif
1324    
1325    if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1326      use_pcre16? "16" : "", option);
1327  }  }
1328    
1329    
# Line 617  return ((value & 0x000000ff) << 24) | Line 1350  return ((value & 0x000000ff) << 24) |
1350  *************************************************/  *************************************************/
1351    
1352  static int  static int
1353  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1354    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1355    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1356  {  {
# Line 632  for (;;) Line 1365  for (;;)
1365    {    {
1366    *limit = mid;    *limit = mid;
1367    
1368    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1369      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
1370    
1371    if (count == errnumber)    if (count == errnumber)
# Line 677  Returns: < 0, = 0, or > 0, according Line 1410  Returns: < 0, = 0, or > 0, according
1410  */  */
1411    
1412  static int  static int
1413  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1414  {  {
1415  while (n--)  while (n--)
1416    {    {
# Line 693  return 0; Line 1426  return 0;
1426  *         Check newline indicator                *  *         Check newline indicator                *
1427  *************************************************/  *************************************************/
1428    
1429  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
1430  xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is  a message and return 0 if there is no match.
 no match.  
1431    
1432  Arguments:  Arguments:
1433    p           points after the leading '<'    p           points after the leading '<'
# Line 705  Returns: appropriate PCRE_NEWLINE_x Line 1437  Returns: appropriate PCRE_NEWLINE_x
1437  */  */
1438    
1439  static int  static int
1440  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
1441  {  {
1442  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1443  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1444  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1445  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1446  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1447    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1448    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1449  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
1450  return 0;  return 0;
1451  }  }
# Line 725  return 0; Line 1459  return 0;
1459  static void  static void
1460  usage(void)  usage(void)
1461  {  {
1462  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
1463    printf("Input and output default to stdin and stdout.\n");
1464    #ifdef SUPPORT_LIBREADLINE
1465    printf("If input is a terminal, readline() is used to read from it.\n");
1466    #else
1467    printf("This version of pcretest is not linked with readline().\n");
1468    #endif
1469    printf("\nOptions:\n");
1470    #ifdef SUPPORT_PCRE16
1471    printf("  -16      use 16-bit interface\n");
1472    #endif
1473  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
1474  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
1475  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 734  printf(" -dfa force DFA matching fo Line 1478  printf(" -dfa force DFA matching fo
1478  #endif  #endif
1479  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
1480  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
1481           "  -M       find MATCH_LIMIT minimum for each subject\n"
1482         "  -m       output memory used information\n"         "  -m       output memory used information\n"
1483         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
1484  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 741  printf(" -p use POSIX interface\n Line 1486  printf(" -p use POSIX interface\n
1486  #endif  #endif
1487  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
1488  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
1489  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
1490           "  -s+      force each pattern to be studied, using JIT if available\n"
1491         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
1492  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1493  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 763  int main(int argc, char **argv) Line 1509  int main(int argc, char **argv)
1509  FILE *infile = stdin;  FILE *infile = stdin;
1510  int options = 0;  int options = 0;
1511  int study_options = 0;  int study_options = 0;
1512    int default_find_match_limit = FALSE;
1513  int op = 1;  int op = 1;
1514  int timeit = 0;  int timeit = 0;
1515  int timeitm = 0;  int timeitm = 0;
1516  int showinfo = 0;  int showinfo = 0;
1517  int showstore = 0;  int showstore = 0;
1518    int force_study = -1;
1519    int force_study_options = 0;
1520  int quiet = 0;  int quiet = 0;
1521  int size_offsets = 45;  int size_offsets = 45;
1522  int size_offsets_max;  int size_offsets_max;
# Line 781  int all_use_dfa = 0; Line 1530  int all_use_dfa = 0;
1530  int yield = 0;  int yield = 0;
1531  int stack_size;  int stack_size;
1532    
1533    pcre_jit_stack *jit_stack = NULL;
1534    
1535  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of captured substring names. Assume
1536  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. */
1537    
1538  uschar copynames[1024];  pcre_uchar copynames[1024];
1539  uschar getnames[1024];  pcre_uchar getnames[1024];
   
 uschar *copynamesptr;  
 uschar *getnamesptr;  
1540    
1541  /* Get buffers from malloc() so that Electric Fence will check their misuse  pcre_uchar *copynamesptr;
1542  when I am debugging. They grow automatically when very long lines are read. */  pcre_uchar *getnamesptr;
1543    
1544  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
1545  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
1546  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
1547    
1548    buffer = (pcre_uint8 *)malloc(buffer_size);
1549    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1550    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1551    
1552  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
1553    
# Line 814  _setmode( _fileno( stdout ), _O_BINARY ) Line 1566  _setmode( _fileno( stdout ), _O_BINARY )
1566    
1567  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1568    {    {
1569    unsigned char *endptr;    pcre_uint8 *endptr;
1570    
1571      if (strcmp(argv[op], "-m") == 0) showstore = 1;
1572      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1573      else if (strcmp(argv[op], "-s+") == 0)
1574        {
1575        force_study = 1;
1576        force_study_options = PCRE_STUDY_JIT_COMPILE;
1577        }
1578    #ifdef SUPPORT_PCRE16
1579      else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1580    #endif
1581    
   if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)  
     showstore = 1;  
1582    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1583    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1584    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
1585    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
1586      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
1587  #if !defined NODFA  #if !defined NODFA
1588    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1589  #endif  #endif
1590    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1591        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1592          *endptr == 0))          *endptr == 0))
1593      {      {
1594      op++;      op++;
# Line 836  while (argc > 1 && argv[op][0] == '-') Line 1598  while (argc > 1 && argv[op][0] == '-')
1598      {      {
1599      int both = argv[op][2] == 0;      int both = argv[op][2] == 0;
1600      int temp;      int temp;
1601      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1602                       *endptr == 0))                       *endptr == 0))
1603        {        {
1604        timeitm = temp;        timeitm = temp;
# Line 847  while (argc > 1 && argv[op][0] == '-') Line 1609  while (argc > 1 && argv[op][0] == '-')
1609      if (both) timeit = timeitm;      if (both) timeit = timeitm;
1610      }      }
1611    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1612        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1613          *endptr == 0))          *endptr == 0))
1614      {      {
1615  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1616      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
1617      exit(1);      exit(1);
1618  #else  #else
# Line 874  while (argc > 1 && argv[op][0] == '-') Line 1636  while (argc > 1 && argv[op][0] == '-')
1636    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
1637      {      {
1638      int rc;      int rc;
1639        unsigned long int lrc;
1640      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1641      printf("Compiled with\n");      printf("Compiled with\n");
1642    
1643    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1644    are set, either both UTFs are supported or both are not supported. */
1645    
1646    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1647        printf("  8-bit and 16-bit support\n");
1648        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1649        if (rc)
1650          printf("  UTF-8 and UTF-16 support\n");
1651        else
1652          printf("  No UTF-8 or UTF-16 support\n");
1653    #elif defined SUPPORT_PCRE8
1654        printf("  8-bit support only\n");
1655      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1656      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1657    #else
1658        printf("  16-bit support only\n");
1659        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1660        printf("  %sUTF-16 support\n", rc? "" : "No ");
1661    #endif
1662    
1663      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1664      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1665        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1666        if (rc)
1667          printf("  Just-in-time compiler support\n");
1668        else
1669          printf("  No just-in-time compiler support\n");
1670      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1671      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      /* Note that these values are always the ASCII values, even
1672        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :      in EBCDIC environments. CR is 13 and NL is 10. */
1673        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
1674          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
1675        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
1676        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
1677        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
1678        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
1679                                         "all Unicode newlines");
1680      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
1681      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
1682      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
1683      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
1684      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
1685      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
1686      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
1687      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
1688      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
1689      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
1690      goto EXIT;      goto EXIT;
# Line 952  if (argc > 2) Line 1744  if (argc > 2)
1744    
1745  /* Set alternative malloc function */  /* Set alternative malloc function */
1746    
1747    #ifdef SUPPORT_PCRE8
1748  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1749  pcre_free = new_free;  pcre_free = new_free;
1750  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
1751  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
1752    #endif
1753    
1754    #ifdef SUPPORT_PCRE16
1755    pcre16_malloc = new_malloc;
1756    pcre16_free = new_free;
1757    pcre16_stack_malloc = stack_malloc;
1758    pcre16_stack_free = stack_free;
1759    #endif
1760    
1761  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1762    
# Line 974  while (!done) Line 1775  while (!done)
1775  #endif  #endif
1776    
1777    const char *error;    const char *error;
1778    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
1779    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
1780    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
1781      const pcre_uint8 *tables = NULL;
1782    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1783    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1784      int do_allcaps = 0;
1785      int do_mark = 0;
1786    int do_study = 0;    int do_study = 0;
1787      int no_force_study = 0;
1788    int do_debug = debug;    int do_debug = debug;
1789    int do_G = 0;    int do_G = 0;
1790    int do_g = 0;    int do_g = 0;
1791    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1792    int do_showrest = 0;    int do_showrest = 0;
1793      int do_showcaprest = 0;
1794    int do_flip = 0;    int do_flip = 0;
1795    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
1796    
1797    use_utf8 = 0;    use_utf = 0;
1798    debug_lengths = 1;    debug_lengths = 1;
1799    
1800    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
1801    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1802    fflush(outfile);    fflush(outfile);
1803    
# Line 1005  while (!done) Line 1810  while (!done)
1810    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1811      {      {
1812      unsigned long int magic, get_options;      unsigned long int magic, get_options;
1813      uschar sbuf[8];      pcre_uint8 sbuf[8];
1814      FILE *f;      FILE *f;
1815    
1816      p++;      p++;
# Line 1028  while (!done) Line 1833  while (!done)
1833        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1834    
1835      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
1836      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
1837    
1838      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1839    
# Line 1047  while (!done) Line 1852  while (!done)
1852          }          }
1853        }        }
1854    
1855      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1856        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1857    
1858      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1859    
1860      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1861      use_utf8 = (get_options & PCRE_UTF8) != 0;      use_utf = (get_options & PCRE_UTF8) != 0;
1862    
1863      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
1864    
1865      if (true_study_size != 0)      if (true_study_size != 0)
1866        {        {
# Line 1071  while (!done) Line 1876  while (!done)
1876          {          {
1877          FAIL_READ:          FAIL_READ:
1878          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1879          if (extra != NULL) new_free(extra);          if (extra != NULL)
1880              {
1881              PCRE_FREE_STUDY(extra);
1882              }
1883          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1884          fclose(f);          fclose(f);
1885          continue;          continue;
# Line 1092  while (!done) Line 1900  while (!done)
1900    
1901    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1902      {      {
1903      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1904      goto SKIP_DATA;      goto SKIP_DATA;
1905      }      }
1906    
1907    pp = p;    pp = p;
1908    poffset = p - buffer;    poffset = (int)(p - buffer);
1909    
1910    for(;;)    for(;;)
1911      {      {
# Line 1108  while (!done) Line 1916  while (!done)
1916        pp++;        pp++;
1917        }        }
1918      if (*pp != 0) break;      if (*pp != 0) break;
1919      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
1920        {        {
1921        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1922        done = 1;        done = 1;
# Line 1152  while (!done) Line 1959  while (!done)
1959        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1960        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1961    
1962        case '+': do_showrest = 1; break;        case '+':
1963          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
1964          break;
1965    
1966          case '=': do_allcaps = 1; break;
1967        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1968        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
1969        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1162  while (!done) Line 1973  while (!done)
1973        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1974        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1975        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
1976          case 'K': do_mark = 1; break;
1977        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1978        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1979    
# Line 1169  while (!done) Line 1981  while (!done)
1981        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1982  #endif  #endif
1983    
1984        case 'S': do_study = 1; break;        case 'S':
1985          if (do_study == 0)
1986            {
1987            do_study = 1;
1988            if (*pp == '+')
1989              {
1990              study_options |= PCRE_STUDY_JIT_COMPILE;
1991              pp++;
1992              }
1993            }
1994          else
1995            {
1996            do_study = 0;
1997            no_force_study = 1;
1998            }
1999          break;
2000    
2001        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2002          case 'W': options |= PCRE_UCP; break;
2003        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2004          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2005        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2006        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2007        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2008    
2009          case 'T':
2010          switch (*pp++)
2011            {
2012            case '0': tables = tables0; break;
2013            case '1': tables = tables1; break;
2014    
2015            case '\r':
2016            case '\n':
2017            case ' ':
2018            case 0:
2019            fprintf(outfile, "** Missing table number after /T\n");
2020            goto SKIP_DATA;
2021    
2022            default:
2023            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2024            goto SKIP_DATA;
2025            }
2026          break;
2027    
2028        case 'L':        case 'L':
2029        ppp = pp;        ppp = pp;
2030        /* The '\r' test here is so that it works on Windows. */        /* The '\r' test here is so that it works on Windows. */
# Line 1201  while (!done) Line 2050  while (!done)
2050    
2051        case '<':        case '<':
2052          {          {
2053          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2054          if (x == 0) goto SKIP_DATA;            {
2055          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
2056          while (*pp++ != '>');            pp += 3;
2057              }
2058            else
2059              {
2060              int x = check_newline(pp, outfile);
2061              if (x == 0) goto SKIP_DATA;
2062              options |= x;
2063              while (*pp++ != '>');
2064              }
2065          }          }
2066        break;        break;
2067    
# Line 1221  while (!done) Line 2078  while (!done)
2078    
2079    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2080    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2081    local character tables. */    local character tables. Neither does it have 16-bit support. */
2082    
2083  #if !defined NOPOSIX  #if !defined NOPOSIX
2084    if (posix || do_posix)    if (posix || do_posix)
# Line 1234  while (!done) Line 2091  while (!done)
2091      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2092      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2093      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2094        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2095        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2096    
2097        first_gotten_store = 0;
2098      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2099    
2100      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1254  while (!done) Line 2114  while (!done)
2114  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2115    
2116      {      {
2117        unsigned long int get_options;
2118    
2119        /* In 16-bit mode, convert the input. */
2120    
2121    #ifdef SUPPORT_PCRE16
2122        if (use_pcre16)
2123          {
2124          if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2125            {
2126            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2127              "converted to UTF-16\n");
2128            goto SKIP_DATA;
2129            }
2130          p = (pcre_uint8 *)buffer16;
2131          }
2132    #endif
2133    
2134        /* Compile many times when timing */
2135    
2136      if (timeit > 0)      if (timeit > 0)
2137        {        {
2138        register int i;        register int i;
# Line 1261  while (!done) Line 2140  while (!done)
2140        clock_t start_time = clock();        clock_t start_time = clock();
2141        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2142          {          {
2143          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2144          if (re != NULL) free(re);          if (re != NULL) free(re);
2145          }          }
2146        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1270  while (!done) Line 2149  while (!done)
2149            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2150        }        }
2151    
2152      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2153        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2154    
2155      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2156      if non-interactive. */      if non-interactive. */
# Line 1283  while (!done) Line 2163  while (!done)
2163          {          {
2164          for (;;)          for (;;)
2165            {            {
2166            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2167              {              {
2168              done = 1;              done = 1;
2169              goto CONTINUE;              goto CONTINUE;
# Line 1297  while (!done) Line 2177  while (!done)
2177        goto CONTINUE;        goto CONTINUE;
2178        }        }
2179    
2180      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
2181      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
2182      returns only limited data. Check that it agrees with the newer one. */      lines. */
2183    
2184      if (log_store)      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2185        fprintf(outfile, "Memory allocation (code space): %d\n",      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
2186    
2187      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
2188      and remember the store that was got. */      and remember the store that was got. */
2189    
2190      true_size = ((real_pcre *)re)->size;      true_size = ((real_pcre *)re)->size;
2191      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2192    
2193        /* Output code size information if requested */
2194    
2195        if (log_store)
2196          fprintf(outfile, "Memory allocation (code space): %d\n",
2197            (int)(first_gotten_store -
2198                  sizeof(real_pcre) -
2199                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2200    
2201      /* If /S was present, study the regexp to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
2202      help with the matching. */      help with the matching, unless the pattern has the SS option, which
2203        suppresses the effect of /S (used for a few test patterns where studying is
2204        never sensible). */
2205    
2206      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
2207        {        {
2208        if (timeit > 0)        if (timeit > 0)
2209          {          {
# Line 1324  while (!done) Line 2211  while (!done)
2211          clock_t time_taken;          clock_t time_taken;
2212          clock_t start_time = clock();          clock_t start_time = clock();
2213          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
2214            extra = pcre_study(re, study_options, &error);            {
2215              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2216              }
2217          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2218          if (extra != NULL) free(extra);          if (extra != NULL)
2219              {
2220              PCRE_FREE_STUDY(extra);
2221              }
2222          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2223            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
2224              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2225          }          }
2226        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2227        if (error != NULL)        if (error != NULL)
2228          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2229        else if (extra != NULL)        else if (extra != NULL)
2230            {
2231          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2232            if (log_store)
2233              {
2234              size_t jitsize;
2235              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2236              if (jitsize != 0)
2237                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2238              }
2239            }
2240          }
2241    
2242        /* If /K was present, we set up for handling MARK data. */
2243    
2244        if (do_mark)
2245          {
2246          if (extra == NULL)
2247            {
2248            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2249            extra->flags = 0;
2250            }
2251          extra->mark = &markptr;
2252          extra->flags |= PCRE_EXTRA_MARK;
2253        }        }
2254    
2255      /* If the 'F' option was present, we flip the bytes of all the integer      /* If the 'F' option was present, we flip the bytes of all the integer
# Line 1346  while (!done) Line 2260  while (!done)
2260      if (do_flip)      if (do_flip)
2261        {        {
2262        real_pcre *rre = (real_pcre *)re;        real_pcre *rre = (real_pcre *)re;
2263        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number =
2264            byteflip(rre->magic_number, sizeof(rre->magic_number));
2265        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
2266        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
2267        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2268        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_bracket =
2269        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));          (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2270        rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));        rre->top_backref =
2271        rre->name_table_offset = byteflip(rre->name_table_offset,          (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2272          rre->first_char =
2273            (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2274          rre->req_char =
2275            (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2276          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2277          sizeof(rre->name_table_offset));          sizeof(rre->name_table_offset));
2278        rre->name_entry_size = byteflip(rre->name_entry_size,        rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2279          sizeof(rre->name_entry_size));          sizeof(rre->name_entry_size));
2280        rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));        rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2281            sizeof(rre->name_count));
2282    
2283        if (extra != NULL)        if (extra != NULL)
2284          {          {
2285          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2286          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2287          rsd->options = byteflip(rsd->options, sizeof(rsd->options));          rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2288            rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2289          }          }
2290        }        }
2291    
2292      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
2293    
2294      SHOW_INFO:      SHOW_INFO:
2295    
2296      if (do_debug)      if (do_debug)
2297        {        {
2298        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
2299    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2300          if (use_pcre16)
2301            pcre16_printint(re, outfile, debug_lengths);
2302          else
2303            pcre_printint(re, outfile, debug_lengths);
2304    #elif defined SUPPORT_PCRE8
2305        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
2306    #else
2307          pcre16_printint(re, outfile, debug_lengths);
2308    #endif
2309        }        }
2310    
2311        /* We already have the options in get_options (see above) */
2312    
2313      if (do_showinfo)      if (do_showinfo)
2314        {        {
2315        unsigned long int get_options, all_options;        unsigned long int all_options;
2316  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2317        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
2318  #endif  #endif
2319        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
2320          hascrorlf;          hascrorlf;
2321        int nameentrysize, namecount;        int nameentrysize, namecount;
2322        const uschar *nametable;        const pcre_uchar *nametable;
2323    
       new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
2324        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2325        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
2326        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
# Line 1401  while (!done) Line 2333  while (!done)
2333        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2334        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2335    
2336          /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2337          that it gives the same results as the new function. */
2338    
2339  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2340        old_count = pcre_info(re, &old_options, &old_first_char);        if (!use_pcre16)
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
2341          {          {
2342          if (old_count != count) fprintf(outfile,          old_count = pcre_info(re, &old_options, &old_first_char);
2343            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,          if (count < 0) fprintf(outfile,
2344              old_count);            "Error %d from pcre_info()\n", count);
2345            else
2346          if (old_first_char != first_char) fprintf(outfile,            {
2347            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            if (old_count != count) fprintf(outfile,
2348              first_char, old_first_char);              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2349                  old_count);
2350          if (old_options != (int)get_options) fprintf(outfile,  
2351            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            if (old_first_char != first_char) fprintf(outfile,
2352              get_options, old_options);              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2353                  first_char, old_first_char);
2354    
2355              if (old_options != (int)get_options) fprintf(outfile,
2356                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2357                  get_options, old_options);
2358              }
2359          }          }
2360  #endif  #endif
2361    
# Line 1448  while (!done) Line 2386  while (!done)
2386        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
2387    
2388        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2389          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2390            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2391            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2392            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
2393            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
2394            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
2395            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
2396              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
2397              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
2398            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
2399            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2400            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2401            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2402            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2403            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2404              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2405              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2406            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2407    
2408        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1501  while (!done) Line 2443  while (!done)
2443          }          }
2444        else        else
2445          {          {
2446          int ch = first_char & 255;          const char *caseless =
2447          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2448            "" : " (caseless)";            "" : " (caseless)";
2449          if (PRINTHEX(ch))  
2450            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
2451              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2452          else          else
2453            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
2454              fprintf(outfile, "First char = ");
2455              pchar(first_char, outfile);
2456              fprintf(outfile, "%s\n", caseless);
2457              }
2458          }          }
2459    
2460        if (need_char < 0)        if (need_char < 0)
# Line 1516  while (!done) Line 2463  while (!done)
2463          }          }
2464        else        else
2465          {          {
2466          int ch = need_char & 255;          const char *caseless =
2467          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2468            "" : " (caseless)";            "" : " (caseless)";
2469          if (PRINTHEX(ch))  
2470            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
2471              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2472          else          else
2473            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
2474          }          }
2475    
2476        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2477        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2478        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2479        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2480          information unless -i or -d was also present. This means that, except
2481          when auto-callouts are involved, the output from runs with and without
2482          -s should be identical. */
2483    
2484        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2485          {          {
2486          if (extra == NULL)          if (extra == NULL)
2487            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2488          else          else
2489            {            {
2490            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
2491            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
2492    
2493              new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
2494              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
2495    
2496              new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
2497            if (start_bits == NULL)            if (start_bits == NULL)
2498              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "No set of starting bytes\n");
2499            else            else
2500              {              {
2501              int i;              int i;
# Line 1555  while (!done) Line 2510  while (!done)
2510                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2511                    c = 2;                    c = 2;
2512                    }                    }
2513                  if (PRINTHEX(i) && i != ' ')                  if (PRINTOK(i) && i != ' ')
2514                    {                    {
2515                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2516                    c += 2;                    c += 2;
# Line 1570  while (!done) Line 2525  while (!done)
2525              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2526              }              }
2527            }            }
2528    
2529            /* Show this only if the JIT was set by /S, not by -s. */
2530    
2531            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2532              {
2533              int jit;
2534              new_info(re, extra, PCRE_INFO_JIT, &jit);
2535              if (jit)
2536                fprintf(outfile, "JIT study was successful\n");
2537              else
2538    #ifdef SUPPORT_JIT
2539                fprintf(outfile, "JIT study was not successful\n");
2540    #else
2541                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2542    #endif
2543              }
2544          }          }
2545        }        }
2546    
# Line 1586  while (!done) Line 2557  while (!done)
2557          }          }
2558        else        else
2559          {          {
2560          uschar sbuf[8];          pcre_uint8 sbuf[8];
2561          sbuf[0] = (true_size >> 24)  & 255;          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2562          sbuf[1] = (true_size >> 16)  & 255;          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2563          sbuf[2] = (true_size >>  8)  & 255;          sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2564          sbuf[3] = (true_size)  & 255;          sbuf[3] = (pcre_uint8)((true_size) & 255);
2565    
2566          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2567          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2568          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2569          sbuf[7] = (true_study_size)  & 255;          sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2570    
2571          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2572              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1604  while (!done) Line 2575  while (!done)
2575            }            }
2576          else          else
2577            {            {
2578            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2579    
2580              /* If there is study data, write it. */
2581    
2582            if (extra != NULL)            if (extra != NULL)
2583              {              {
2584              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1614  while (!done) Line 2588  while (!done)
2588                  strerror(errno));                  strerror(errno));
2589                }                }
2590              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
2591              }              }
2592            }            }
2593          fclose(f);          fclose(f);
2594          }          }
2595    
2596        new_free(re);        new_free(re);
2597        if (extra != NULL) new_free(extra);        if (extra != NULL)
2598        if (tables != NULL) new_free((void *)tables);          {
2599            PCRE_FREE_STUDY(extra);
2600            }
2601          if (locale_set)
2602            {
2603            new_free((void *)tables);
2604            setlocale(LC_CTYPE, "C");
2605            locale_set = 0;
2606            }
2607        continue;  /* With next regex */        continue;  /* With next regex */
2608        }        }
2609      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1631  while (!done) Line 2612  while (!done)
2612    
2613    for (;;)    for (;;)
2614      {      {
2615      uschar *q;      pcre_uint8 *q;
2616      uschar *bptr;      pcre_uint8 *bptr;
2617      int *use_offsets = offsets;      int *use_offsets = offsets;
2618      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2619      int callout_data = 0;      int callout_data = 0;
2620      int callout_data_set = 0;      int callout_data_set = 0;
2621      int count, c;      int count, c;
2622      int copystrings = 0;      int copystrings = 0;
2623      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
2624      int getstrings = 0;      int getstrings = 0;
2625      int getlist = 0;      int getlist = 0;
2626      int gmatched = 0;      int gmatched = 0;
2627      int start_offset = 0;      int start_offset = 0;
2628        int start_offset_sign = 1;
2629      int g_notempty = 0;      int g_notempty = 0;
2630      int use_dfa = 0;      int use_dfa = 0;
2631    
# Line 1657  while (!done) Line 2639  while (!done)
2639    
2640      pcre_callout = callout;      pcre_callout = callout;
2641      first_callout = 1;      first_callout = 1;
2642        last_callout_mark = NULL;
2643      callout_extra = 0;      callout_extra = 0;
2644      callout_count = 0;      callout_count = 0;
2645      callout_fail_count = 999999;      callout_fail_count = 999999;
# Line 1669  while (!done) Line 2652  while (!done)
2652      len = 0;      len = 0;
2653      for (;;)      for (;;)
2654        {        {
2655        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
2656          {          {
2657          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
2658              {
2659              fprintf(outfile, "\n");
2660              break;
2661              }
2662          done = 1;          done = 1;
2663          goto CONTINUE;          goto CONTINUE;
2664          }          }
# Line 1712  while (!done) Line 2698  while (!done)
2698            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2699    
2700  #if !defined NOUTF8  #if !defined NOUTF8
2701          if (use_utf8 && c > 255)          if (use_utf && c > 255)
2702            {            {
2703            unsigned char buff8[8];            pcre_uint8 buff8[8];
2704            int ii, utn;            int ii, utn;
2705            utn = ord2utf8(c, buff8);            utn = ord2utf8(c, buff8);
2706            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
# Line 1730  while (!done) Line 2716  while (!done)
2716  #if !defined NOUTF8  #if !defined NOUTF8
2717          if (*p == '{')          if (*p == '{')
2718            {            {
2719            unsigned char *pt = p;            pcre_uint8 *pt = p;
2720            c = 0;            c = 0;
2721            while (isxdigit(*(++pt)))  
2722              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2723              when isxdigit() is a macro that refers to its argument more than
2724              once. This is banned by the C Standard, but apparently happens in at
2725              least one MacOS environment. */
2726    
2727              for (pt++; isxdigit(*pt); pt++)
2728                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2729            if (*pt == '}')            if (*pt == '}')
2730              {              {
2731              unsigned char buff8[8];              pcre_uint8 buff8[8];
2732              int ii, utn;              int ii, utn;
2733              utn = ord2utf8(c, buff8);              if (use_utf)
2734              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                {
2735              c = buff8[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
2736                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
2737                  c = buff8[ii];   /* Last byte */
2738                  }
2739                else
2740                 {
2741                 if (c > 255)
2742                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
2743                     "UTF-8 mode is not enabled.\n"
2744                     "** Truncation will probably give the wrong result.\n", c);
2745                 }
2746              p = pt + 1;              p = pt + 1;
2747              break;              break;
2748              }              }
# Line 1753  while (!done) Line 2755  while (!done)
2755          c = 0;          c = 0;
2756          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
2757            {            {
2758            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
2759            p++;            p++;
2760            }            }
2761          break;          break;
# Line 1763  while (!done) Line 2765  while (!done)
2765          continue;          continue;
2766    
2767          case '>':          case '>':
2768            if (*p == '-')
2769              {
2770              start_offset_sign = -1;
2771              p++;
2772              }
2773          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
2774            start_offset *= start_offset_sign;
2775          continue;          continue;
2776    
2777          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1782  while (!done) Line 2790  while (!done)
2790            }            }
2791          else if (isalnum(*p))          else if (isalnum(*p))
2792            {            {
2793            uschar *npp = copynamesptr;            pcre_uchar *npp = copynamesptr;
2794            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2795            *npp++ = 0;            *npp++ = 0;
2796            *npp = 0;            *npp = 0;
# Line 1836  while (!done) Line 2844  while (!done)
2844  #endif  #endif
2845            use_dfa = 1;            use_dfa = 1;
2846          continue;          continue;
2847    #endif
2848    
2849    #if !defined NODFA
2850          case 'F':          case 'F':
2851          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
2852          continue;          continue;
# Line 1850  while (!done) Line 2860  while (!done)
2860            }            }
2861          else if (isalnum(*p))          else if (isalnum(*p))
2862            {            {
2863            uschar *npp = getnamesptr;            pcre_uchar *npp = getnamesptr;
2864            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
2865            *npp++ = 0;            *npp++ = 0;
2866            *npp = 0;            *npp = 0;
# Line 1861  while (!done) Line 2871  while (!done)
2871            }            }
2872          continue;          continue;
2873    
2874            case 'J':
2875            while(isdigit(*p)) n = n * 10 + *p++ - '0';
2876            if (extra != NULL
2877                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
2878                && extra->executable_jit != NULL)
2879              {
2880              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
2881              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
2882              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
2883              }
2884            continue;
2885    
2886          case 'L':          case 'L':
2887          getlist = 1;          getlist = 1;
2888          continue;          continue;
# Line 1870  while (!done) Line 2892  while (!done)
2892          continue;          continue;
2893    
2894          case 'N':          case 'N':
2895          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
2896              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2897            else
2898              options |= PCRE_NOTEMPTY;
2899          continue;          continue;
2900    
2901          case 'O':          case 'O':
# Line 1893  while (!done) Line 2918  while (!done)
2918          continue;          continue;
2919    
2920          case 'P':          case 'P':
2921          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2922              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2923          continue;          continue;
2924    
2925          case 'Q':          case 'Q':
# Line 1928  while (!done) Line 2954  while (!done)
2954          show_malloc = 1;          show_malloc = 1;
2955          continue;          continue;
2956    
2957            case 'Y':
2958            options |= PCRE_NO_START_OPTIMIZE;
2959            continue;
2960    
2961          case 'Z':          case 'Z':
2962          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2963          continue;          continue;
# Line 1948  while (!done) Line 2978  while (!done)
2978        *q++ = c;        *q++ = c;
2979        }        }
2980      *q = 0;      *q = 0;
2981      len = q - dbuffer;      len = (int)(q - dbuffer);
2982    
2983        /* Move the data to the end of the buffer so that a read over the end of
2984        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2985        we are using the POSIX interface, we must include the terminating zero. */
2986    
2987    #if !defined NOPOSIX
2988        if (posix || do_posix)
2989          {
2990          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2991          bptr += buffer_size - len - 1;
2992          }
2993        else
2994    #endif
2995          {
2996          memmove(bptr + buffer_size - len, bptr, len);
2997          bptr += buffer_size - len;
2998          }
2999    
3000      if ((all_use_dfa || use_dfa) && find_match_limit)      if ((all_use_dfa || use_dfa) && find_match_limit)
3001        {        {
# Line 1969  while (!done) Line 3016  while (!done)
3016          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
3017        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
3018        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
3019          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
3020    
3021        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
3022    
# Line 1990  while (!done) Line 3038  while (!done)
3038            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3039              {              {
3040              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3041              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer + pmatch[i].rm_so,
3042                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3043              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3044              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3045                {                {
3046                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3047                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3048                  outfile);                  outfile);
3049                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3050                }                }
# Line 2004  while (!done) Line 3052  while (!done)
3052            }            }
3053          }          }
3054        free(pmatch);        free(pmatch);
3055          goto NEXT_DATA;
3056        }        }
3057    
3058    #endif  /* !defined NOPOSIX */
3059    
3060      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3061    
3062      else  #ifdef SUPPORT_PCRE16
3063  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3064          {
3065          len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3066          if (len < 0)
3067            {
3068            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3069              "converted to UTF-16\n");
3070            goto NEXT_DATA;
3071            }
3072          bptr = (pcre_uint8 *)buffer16;
3073          }
3074    #endif
3075    
3076      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3077        {        {
3078          markptr = NULL;
3079    
3080        if (timeitm > 0)        if (timeitm > 0)
3081          {          {
3082          register int i;          register int i;
# Line 2024  while (!done) Line 3088  while (!done)
3088            {            {
3089            int workspace[1000];            int workspace[1000];
3090            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3091              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3092                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
3093                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
3094            }            }
# Line 2032  while (!done) Line 3096  while (!done)
3096  #endif  #endif
3097    
3098          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3099            count = pcre_exec(re, extra, (char *)bptr, len,            {
3100              PCRE_EXEC(count, re, extra, bptr, len,
3101              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
3102              }
3103          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3104          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3105            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2043  while (!done) Line 3108  while (!done)
3108    
3109        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3110        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3111        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3112          running of pcre_exec(), so disable the JIT optimization. This makes it
3113          possible to run the same set of tests with and without JIT externally
3114          requested. */
3115    
3116        if (find_match_limit)        if (find_match_limit)
3117          {          {
# Line 2052  while (!done) Line 3120  while (!done)
3120            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3121            extra->flags = 0;            extra->flags = 0;
3122            }            }
3123            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3124    
3125          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3126            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2075  while (!done) Line 3144  while (!done)
3144            }            }
3145          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3146          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3147          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3148            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3149          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3150          }          }
# Line 2087  while (!done) Line 3156  while (!done)
3156        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3157          {          {
3158          int workspace[1000];          int workspace[1000];
3159          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
3160            options | g_notempty, use_offsets, use_size_offsets, workspace,            options | g_notempty, use_offsets, use_size_offsets, workspace,
3161            sizeof(workspace)/sizeof(int));            sizeof(workspace)/sizeof(int));
3162          if (count == 0)          if (count == 0)
# Line 2100  while (!done) Line 3169  while (!done)
3169    
3170        else        else
3171          {          {
3172          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3173            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3174          if (count == 0)          if (count == 0)
3175            {            {
3176            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2135  while (!done) Line 3204  while (!done)
3204              }              }
3205            }            }
3206    
3207            /* do_allcaps requests showing of all captures in the pattern, to check
3208            unset ones at the end. */
3209    
3210            if (do_allcaps)
3211              {
3212              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3213              count++;   /* Allow for full match */
3214              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3215              }
3216    
3217            /* Output the captured substrings */
3218    
3219          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
3220            {            {
3221            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
3222                {
3223                if (use_offsets[i] != -1)
3224                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3225                    use_offsets[i], i);
3226                if (use_offsets[i+1] != -1)
3227                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3228                    use_offsets[i+1], i+1);
3229              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3230                }
3231            else            else
3232              {              {
3233              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3234              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr + use_offsets[i],
3235                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3236              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3237              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3238                {                {
3239                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
3240                  {                PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3241                  fprintf(outfile, " 0+ ");                  outfile);
3242                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
3243                }                }
3244              }              }
3245            }            }
3246    
3247            if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
3248    
3249          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3250            {            {
3251            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
# Line 2231  while (!done) Line 3319  while (!done)
3319                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3320              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
3321                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
3322              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
3323              }              }
3324            }            }
# Line 2241  while (!done) Line 3328  while (!done)
3328    
3329        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
3330          {          {
3331          fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
3332  #if !defined NODFA            else fprintf(outfile, "Partial match, mark=%s", markptr);
3333          if ((all_use_dfa || use_dfa) && use_size_offsets > 2)          if (use_size_offsets > 1)
3334            fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],            {
3335              bptr + use_offsets[0]);            fprintf(outfile, ": ");
3336  #endif            PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3337                outfile);
3338              }
3339          fprintf(outfile, "\n");          fprintf(outfile, "\n");
3340          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
3341          }          }
# Line 2256  while (!done) Line 3345  while (!done)
3345        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
3346        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
3347    
3348        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
3349        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
3350        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
3351        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
3352        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
3353          newline setting in the pattern; if none was set, use pcre_config() to
3354          find the default.
3355    
3356        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
3357        character, not one byte. */        character, not one byte. */
# Line 2276  while (!done) Line 3367  while (!done)
3367              {              {
3368              int d;              int d;
3369              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
3370              obits = (d == '\r')? PCRE_NEWLINE_CR :              /* Note that these values are always the ASCII ones, even in
3371                      (d == '\n')? PCRE_NEWLINE_LF :              EBCDIC environments. CR = 13, NL = 10. */
3372                      (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :              obits = (d == 13)? PCRE_NEWLINE_CR :
3373                        (d == 10)? PCRE_NEWLINE_LF :
3374                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
3375                      (d == -2)? PCRE_NEWLINE_ANYCRLF :                      (d == -2)? PCRE_NEWLINE_ANYCRLF :
3376                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
3377              }              }
3378            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3379                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3380                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3381                &&                &&
3382                start_offset < len - 1 &&                start_offset < len - 1 &&
3383                bptr[start_offset] == '\r' &&                bptr[start_offset] == '\r' &&
3384                bptr[start_offset+1] == '\n')                bptr[start_offset+1] == '\n')
3385              onechar++;              onechar++;
3386            else if (use_utf8)            else if (use_utf)
3387              {              {
3388              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3389                {                {
3390                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3391                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3392                }                }
3393              }              }
3394            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3395            }            }
3396          else          else
3397            {            {
3398            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3399              {              {
3400              if (gmatched == 0) fprintf(outfile, "No match\n");              case PCRE_ERROR_NOMATCH:
3401                if (gmatched == 0)
3402                  {
3403                  if (markptr == NULL) fprintf(outfile, "No match\n");
3404                    else fprintf(outfile, "No match, mark = %s\n", markptr);
3405                  }
3406                break;
3407    
3408                case PCRE_ERROR_BADUTF8:
3409                case PCRE_ERROR_SHORTUTF8:
3410                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3411                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3412                if (use_size_offsets >= 2)
3413                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3414                    use_offsets[1]);
3415                fprintf(outfile, "\n");
3416                break;
3417    
3418                default:
3419                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3420                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3421                else
3422                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3423                break;
3424              }              }
3425            else fprintf(outfile, "Error %d\n", count);  
3426            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3427            }            }
3428          }          }
# Line 2317  while (!done) Line 3432  while (!done)
3432        if (!do_g && !do_G) break;        if (!do_g && !do_G) break;
3433    
3434        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
3435        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic what
3436        what Perl's /g options does. This turns out to be rather cunning. First        Perl's /g options does. This turns out to be rather cunning. First we set
3437        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the        PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
3438        same point. If this fails (picked up above) we advance to the next        same point. If this fails (picked up above) we advance to the next
3439        character. */        character. */
3440    
# Line 2328  while (!done) Line 3443  while (!done)
3443        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
3444          {          {
3445          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
3446          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
3447          }          }
3448    
3449        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
# Line 2354  while (!done) Line 3469  while (!done)
3469  #endif  #endif
3470    
3471    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3472    if (extra != NULL) new_free(extra);    if (extra != NULL)
3473    if (tables != NULL)      {
3474        PCRE_FREE_STUDY(extra);
3475        }
3476      if (locale_set)
3477      {      {
3478      new_free((void *)tables);      new_free((void *)tables);
3479      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3480      locale_set = 0;      locale_set = 0;
3481      }      }
3482      if (jit_stack != NULL)
3483        {
3484        pcre_jit_stack_free(jit_stack);
3485        jit_stack = NULL;
3486        }
3487    }    }
3488    
3489  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 2375  free(dbuffer); Line 3498  free(dbuffer);
3498  free(pbuffer);  free(pbuffer);
3499  free(offsets);  free(offsets);
3500    
3501    #ifdef SUPPORT_PCRE16
3502    if (buffer16 != NULL) free(buffer16);
3503    #endif
3504    
3505  return yield;  return yield;
3506  }  }
3507    

Legend:
Removed from v.227  
changed lines
  Added in v.810

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12