/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/pcretest.c revision 553 by ph10, Fri Oct 22 15:57:50 2010 UTC code/branches/pcre16/pcretest.c revision 814 by ph10, Wed Dec 21 12:05:24 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 79  input mode under Windows. */ Line 80  input mode under Windows. */
80  #define fileno _fileno  #define fileno _fileno
81  #endif  #endif
82    
83    /* A user sent this fix for Borland Builder 5 under Windows. */
84    
85    #ifdef __BORLANDC__
86    #define _setmode(handle, mode) setmode(handle, mode)
87    #endif
88    
89    /* Not Windows */
90    
91  #else  #else
92  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
93  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 99  appropriately for an application, not fo Line 108  appropriately for an application, not fo
108  #include "pcre.h"  #include "pcre.h"
109  #include "pcre_internal.h"  #include "pcre_internal.h"
110    
111    /* The pcre_printint() function, which prints the internal form of a compiled
112    regex, is held in a separate file so that (a) it can be compiled in either
113    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114    when that is compiled in debug mode. */
115    
116    #ifdef SUPPORT_PCRE8
117    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
118    #endif
119    #ifdef SUPPORT_PCRE16
120    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
121    #endif
122    
123  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
124  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
125  external symbols to prevent clashes. */  external symbols to prevent clashes. */
126    
127  #define _pcre_ucp_gentype      ucp_gentype  #define _pcre_ucp_gentype      ucp_gentype
128    #define _pcre_ucp_typerange    ucp_typerange
129  #define _pcre_utf8_table1      utf8_table1  #define _pcre_utf8_table1      utf8_table1
130  #define _pcre_utf8_table1_size utf8_table1_size  #define _pcre_utf8_table1_size utf8_table1_size
131  #define _pcre_utf8_table2      utf8_table2  #define _pcre_utf8_table2      utf8_table2
# Line 116  external symbols to prevent clashes. */ Line 138  external symbols to prevent clashes. */
138    
139  #include "pcre_tables.c"  #include "pcre_tables.c"
140    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
141  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
142  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
143  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
144  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
145  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
146    
147    #ifdef EBCDIC
148    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
149    #else
150    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151    #endif
152    
153  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154    
155  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
156  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 140  Makefile. */ Line 160  Makefile. */
160  #include "pcreposix.h"  #include "pcreposix.h"
161  #endif  #endif
162    
163  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
164  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165  interface to the DFA matcher (NODFA), and without the doublecheck of the old  without the interface to the DFA matcher (NODFA), and without the doublecheck
166  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167  UTF8 support if PCRE is built without it. */  out the UTF8 support if PCRE is built without it. */
168    
169  #ifndef SUPPORT_UTF8  #ifndef SUPPORT_UTF8
170  #ifndef NOUTF8  #ifndef NOUTF8
# Line 152  UTF8 support if PCRE is built without it Line 172  UTF8 support if PCRE is built without it
172  #endif  #endif
173  #endif  #endif
174    
175    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177    only from one place and is handled differently). I couldn't dream up any way of
178    using a single macro to do this in a generic way, because of the many different
179    argument requirements. We know that at least one of SUPPORT_PCRE8 and
180    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181    use these in the definitions of generic macros. */
182    
183    #ifdef SUPPORT_PCRE8
184    
185    #define PCHARS8(lv, p, len, f) \
186      lv = pchars((pcre_uint8 *)p, len, f)
187    
188    #define PCHARSV8(p, len, f) \
189      (void)pchars((pcre_uint8 *)p, len, f)
190    
191    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
192      re = pcre_compile((char *)pat, options, error, erroffset, tables)
193    
194    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
195        offsets, size_offsets, workspace, size_workspace) \
196      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
197        offsets, size_offsets, workspace, size_workspace)
198    
199    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
200        offsets, size_offsets) \
201      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
202        offsets, size_offsets)
203    
204    #define PCRE_FREE_STUDY8(extra) \
205      pcre_free_study(extra)
206    
207    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
208      pcre_pattern_to_host_byte_order(re, extra, tables)
209    
210    #define PCRE_STUDY8(extra, re, options, error) \
211      extra = pcre_study(re, options, error)
212    
213    #endif /* SUPPORT_PCRE8 */
214    
215    
216    #ifdef SUPPORT_PCRE16
217    
218    #define PCHARS16(lv, p, len, f) \
219      lv = pchars16((PCRE_SPTR16)p, len, f)
220    
221    #define PCHARSV16(p, len, f) \
222      (void)pchars16((PCRE_SPTR16)p, len, f)
223    
224    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
225      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
226    
227    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
228        offsets, size_offsets, workspace, size_workspace) \
229      count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
230        options, offsets, size_offsets, workspace, size_workspace)
231    
232    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
233        offsets, size_offsets) \
234      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
235        options, offsets, size_offsets)
236    
237    #define PCRE_FREE_STUDY16(extra) \
238      pcre16_free_study(extra)
239    
240    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
241      pcre16_pattern_to_host_byte_order(re, extra, tables)
242    
243    #define PCRE_STUDY16(extra, re, options, error) \
244      extra = pcre16_study(re, options, error)
245    
246    #endif /* SUPPORT_PCRE16 */
247    
248    
249    /* ----- Both modes are supported; a runtime test is needed ----- */
250    
251    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
252    
253    #define PCHARS(lv, p, len, f) \
254      if (use_pcre16) \
255        PCHARS16(lv, p, len, f); \
256      else \
257        PCHARS8(lv, p, len, f)
258    
259    #define PCHARSV(p, len, f) \
260      if (use_pcre16) \
261        PCHARSV16(p, len, f); \
262      else \
263        PCHARSV8(p, len, f)
264    
265    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
266      if (use_pcre16) \
267        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
268      else \
269        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
270    
271    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
272        offsets, size_offsets, workspace, size_workspace) \
273      if (use_pcre16) \
274        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
275          offsets, size_offsets, workspace, size_workspace); \
276      else \
277        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
278          offsets, size_offsets, workspace, size_workspace)
279    
280    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
281        offsets, size_offsets) \
282      if (use_pcre16) \
283        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
284          offsets, size_offsets); \
285      else \
286        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
287          offsets, size_offsets)
288    
289    #define PCRE_FREE_STUDY(extra) \
290      if (use_pcre16) \
291        PCRE_FREE_STUDY16(extra); \
292      else \
293        PCRE_FREE_STUDY8(extra)
294    
295    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
296      if (use_pcre16) \
297        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
298      else \
299        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
300    
301    #define PCRE_STUDY(extra, re, options, error) \
302      if (use_pcre16) \
303        PCRE_STUDY16(extra, re, options, error); \
304      else \
305        PCRE_STUDY8(extra, re, options, error)
306    
307    /* ----- Only 8-bit mode is supported ----- */
308    
309    #elif defined SUPPORT_PCRE8
310    #define PCHARS           PCHARS8
311    #define PCHARSV          PCHARSV8
312    #define PCRE_COMPILE     PCRE_COMPILE8
313    #define PCRE_DFA_EXEC    PCRE_DFA_EXEC8
314    #define PCRE_EXEC        PCRE_EXEC8
315    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY8
316    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
317    #define PCRE_STUDY       PCRE_STUDY8
318    
319    /* ----- Only 16-bit mode is supported ----- */
320    
321    #else
322    #define PCHARS           PCHARS16
323    #define PCHARSV          PCHARSV16
324    #define PCRE_COMPILE     PCRE_COMPILE16
325    #define PCRE_DFA_EXEC    PCRE_DFA_EXEC16
326    #define PCRE_EXEC        PCRE_EXEC16
327    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY16
328    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
329    #define PCRE_STUDY       PCRE_STUDY16
330    #endif
331    
332    /* ----- End of mode-specific function call macros ----- */
333    
334    
335  /* Other parameters */  /* Other parameters */
336    
# Line 179  static int debug_lengths; Line 358  static int debug_lengths;
358  static int first_callout;  static int first_callout;
359  static int locale_set = 0;  static int locale_set = 0;
360  static int show_malloc;  static int show_malloc;
361  static int use_utf8;  static int use_utf;
362  static size_t gotten_store;  static size_t gotten_store;
363    static size_t first_gotten_store = 0;
364    static const unsigned char *last_callout_mark = NULL;
365    
366  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
367    
368  static int buffer_size = 50000;  static int buffer_size = 50000;
369  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
370  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
371  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
372    
373    /* Another buffer is needed translation to 16-bit character strings. It will
374    obtained and extended as required. */
375    
376    #ifdef SUPPORT_PCRE16
377    static int buffer16_size = 0;
378    static pcre_uint16 *buffer16 = NULL;
379    
380    /* We need the table of operator lengths that is used for 16-bit compiling, in
381    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
382    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
383    appropriately for the 16-bit world. Just as a safety check, make sure that
384    COMPILE_PCRE16 is *not* set. */
385    
386    #ifdef COMPILE_PCRE16
387    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
388    #endif
389    
390    #if LINK_SIZE == 2
391    #undef LINK_SIZE
392    #define LINK_SIZE 1
393    #elif LINK_SIZE == 3 || LINK_SIZE == 4
394    #undef LINK_SIZE
395    #define LINK_SIZE 2
396    #else
397    #error LINK_SIZE must be either 2, 3, or 4
398    #endif
399    
400    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
401    
402    #endif  /* SUPPORT_PCRE16 */
403    
404    /* If we have 8-bit support, default use_pcre16 to false; if there is also
405    16-bit support, it can be changed by an option. If there is no 8-bit support,
406    there must be 16-bit support, so default it to 1. */
407    
408    #ifdef SUPPORT_PCRE8
409    static int use_pcre16 = 0;
410    #else
411    static int use_pcre16 = 1;
412    #endif
413    
414    /* Textual explanations for runtime error codes */
415    
416    static const char *errtexts[] = {
417      NULL,  /* 0 is no error */
418      NULL,  /* NOMATCH is handled specially */
419      "NULL argument passed",
420      "bad option value",
421      "magic number missing",
422      "unknown opcode - pattern overwritten?",
423      "no more memory",
424      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
425      "match limit exceeded",
426      "callout error code",
427      NULL,  /* BADUTF8 is handled specially */
428      "bad UTF-8 offset",
429      NULL,  /* PARTIAL is handled specially */
430      "not used - internal error",
431      "internal error - pattern overwritten?",
432      "bad count value",
433      "item unsupported for DFA matching",
434      "backreference condition or recursion test not supported for DFA matching",
435      "match limit not supported for DFA matching",
436      "workspace size exceeded in DFA matching",
437      "too much recursion for DFA matching",
438      "recursion limit exceeded",
439      "not used - internal error",
440      "invalid combination of newline options",
441      "bad offset value",
442      NULL,  /* SHORTUTF8 is handled specially */
443      "nested recursion at the same subject position",
444      "JIT stack limit reached",
445      "pattern compiled in wrong mode (8-bit/16-bit error)"
446    };
447    
448    
449  /*************************************************  /*************************************************
# Line 202  the L (locale) option also adjusts the t Line 458  the L (locale) option also adjusts the t
458  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
459  only ASCII characters. */  only ASCII characters. */
460    
461  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
462    
463  /* This table is a lower casing table. */  /* This table is a lower casing table. */
464    
# Line 375  graph, print, punct, and cntrl. Other cl Line 631  graph, print, punct, and cntrl. Other cl
631  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
632  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
633    
634  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
635  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
636  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
637  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 516  static const unsigned char tables1[] = { Line 772  static const unsigned char tables1[] = {
772    
773    
774    
775    
776    #ifndef HAVE_STRERROR
777    /*************************************************
778    *     Provide strerror() for non-ANSI libraries  *
779    *************************************************/
780    
781    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
782    in their libraries, but can provide the same facility by this simple
783    alternative function. */
784    
785    extern int   sys_nerr;
786    extern char *sys_errlist[];
787    
788    char *
789    strerror(int n)
790    {
791    if (n < 0 || n >= sys_nerr) return "unknown error number";
792    return sys_errlist[n];
793    }
794    #endif /* HAVE_STRERROR */
795    
796    
797    /*************************************************
798    *         JIT memory callback                    *
799    *************************************************/
800    
801    static pcre_jit_stack* jit_callback(void *arg)
802    {
803    return (pcre_jit_stack *)arg;
804    }
805    
806    
807    /*************************************************
808    *            Convert UTF-8 string to value       *
809    *************************************************/
810    
811    /* This function takes one or more bytes that represents a UTF-8 character,
812    and returns the value of the character.
813    
814    Argument:
815      utf8bytes   a pointer to the byte vector
816      vptr        a pointer to an int to receive the value
817    
818    Returns:      >  0 => the number of bytes consumed
819                  -6 to 0 => malformed UTF-8 character at offset = (-return)
820    */
821    
822    #if !defined NOUTF8
823    
824    static int
825    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
826    {
827    int c = *utf8bytes++;
828    int d = c;
829    int i, j, s;
830    
831    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
832      {
833      if ((d & 0x80) == 0) break;
834      d <<= 1;
835      }
836    
837    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
838    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
839    
840    /* i now has a value in the range 1-5 */
841    
842    s = 6*i;
843    d = (c & utf8_table3[i]) << s;
844    
845    for (j = 0; j < i; j++)
846      {
847      c = *utf8bytes++;
848      if ((c & 0xc0) != 0x80) return -(j+1);
849      s -= 6;
850      d |= (c & 0x3f) << s;
851      }
852    
853    /* Check that encoding was the correct unique one */
854    
855    for (j = 0; j < utf8_table1_size; j++)
856      if (d <= utf8_table1[j]) break;
857    if (j != i) return -(i+1);
858    
859    /* Valid value */
860    
861    *vptr = d;
862    return i+1;
863    }
864    
865    #endif
866    
867    
868    
869    /*************************************************
870    *       Convert character value to UTF-8         *
871    *************************************************/
872    
873    /* This function takes an integer value in the range 0 - 0x7fffffff
874    and encodes it as a UTF-8 character in 0 to 6 bytes.
875    
876    Arguments:
877      cvalue     the character value
878      utf8bytes  pointer to buffer for result - at least 6 bytes long
879    
880    Returns:     number of characters placed in the buffer
881    */
882    
883    #if !defined NOUTF8
884    
885    static int
886    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
887    {
888    register int i, j;
889    for (i = 0; i < utf8_table1_size; i++)
890      if (cvalue <= utf8_table1[i]) break;
891    utf8bytes += i;
892    for (j = i; j > 0; j--)
893     {
894     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
895     cvalue >>= 6;
896     }
897    *utf8bytes = utf8_table2[i] | cvalue;
898    return i + 1;
899    }
900    
901    #endif
902    
903    
904    
905    #ifdef SUPPORT_PCRE16
906    /*************************************************
907    *         Convert a string to 16-bit             *
908    *************************************************/
909    
910    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
911    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
912    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
913    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
914    result is always left in buffer16.
915    
916    Arguments:
917      p          points to a byte string
918      utf        true if UTF-8 (to be converted to UTF-16)
919      len        number of bytes in the string (excluding trailing zero)
920    
921    Returns:     number of 16-bit data items used (excluding trailing zero)
922                 OR -1 if a UTF-8 string is malformed
923    */
924    
925    static int
926    to16(pcre_uint8 *p, int utf, int len)
927    {
928    pcre_uint16 *pp;
929    
930    if (buffer16_size < 2*len + 2)
931      {
932      if (buffer16 != NULL) free(buffer16);
933      buffer16_size = 2*len + 2;
934      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
935      if (buffer16 == NULL)
936        {
937        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
938        exit(1);
939        }
940      }
941    
942    pp = buffer16;
943    
944    if (!utf)
945      {
946      while (len-- > 0) *pp++ = *p++;
947      }
948    
949    else
950      {
951      int c;
952      while (len > 0)
953        {
954        int chlen = utf82ord(p, &c);
955        if (chlen <= 0) return -1;
956        p += chlen;
957        len -= chlen;
958        if (c < 0x10000) *pp++ = c; else
959          {
960          c -= 0x10000;
961          *pp++ = 0xD800 | (c >> 10);
962          *pp++ = 0xDC00 | (c & 0x3ff);
963          }
964        }
965      }
966    
967    *pp = 0;
968    return pp - buffer16;
969    }
970    #endif
971    
972    
973  /*************************************************  /*************************************************
974  *        Read or extend an input line            *  *        Read or extend an input line            *
975  *************************************************/  *************************************************/
# Line 539  Returns: pointer to the start of n Line 993  Returns: pointer to the start of n
993                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
994  */  */
995    
996  static uschar *  static pcre_uint8 *
997  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
998  {  {
999  uschar *here = start;  pcre_uint8 *here = start;
1000    
1001  for (;;)  for (;;)
1002    {    {
# Line 589  for (;;) Line 1043  for (;;)
1043    else    else
1044      {      {
1045      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1046      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1047      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1048      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1049    
1050      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1051        {        {
# Line 622  return NULL; /* Control never gets here Line 1076  return NULL; /* Control never gets here
1076    
1077    
1078    
   
   
   
   
1079  /*************************************************  /*************************************************
1080  *          Read number from string               *  *          Read number from string               *
1081  *************************************************/  *************************************************/
# Line 642  Returns: the unsigned long Line 1092  Returns: the unsigned long
1092  */  */
1093    
1094  static int  static int
1095  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1096  {  {
1097  int result = 0;  int result = 0;
1098  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 653  return(result); Line 1103  return(result);
1103    
1104    
1105    
   
1106  /*************************************************  /*************************************************
1107  *            Convert UTF-8 string to value       *  *             Print one character                *
1108  *************************************************/  *************************************************/
1109    
1110  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
1111    
1112  Argument:  static int pchar(int c, FILE *f)
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1113  {  {
1114  int c = *utf8bytes++;  if (PRINTOK(c))
 int d = c;  
 int i, j, s;  
   
 for (i = -1; i < 6; i++)               /* i is number of additional bytes */  
1115    {    {
1116    if ((d & 0x80) == 0) break;    if (f != NULL) fprintf(f, "%c", c);
1117    d <<= 1;    return 1;
1118    }    }
1119    
1120  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (c < 0x100)
 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  
   
 /* i now has a value in the range 1-5 */  
   
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
   
 for (j = 0; j < i; j++)  
1121    {    {
1122    c = *utf8bytes++;    if (use_utf)
1123    if ((c & 0xc0) != 0x80) return -(j+1);      {
1124    s -= 6;      if (f != NULL) fprintf(f, "\\x{%02x}", c);
1125    d |= (c & 0x3f) << s;      return 6;
1126        }
1127      else
1128        {
1129        if (f != NULL) fprintf(f, "\\x%02x", c);
1130        return 4;
1131        }
1132    }    }
1133    
1134  /* Check that encoding was the correct unique one */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1135    return (c <= 0x000000ff)? 6 :
1136  for (j = 0; j < utf8_table1_size; j++)         (c <= 0x00000fff)? 7 :
1137    if (d <= utf8_table1[j]) break;         (c <= 0x0000ffff)? 8 :
1138  if (j != i) return -(i+1);         (c <= 0x000fffff)? 9 : 10;
   
 /* Valid value */  
   
 *vptr = d;  
 return i+1;  
1139  }  }
1140    
 #endif  
   
1141    
1142    
1143    #ifdef SUPPORT_PCRE8
1144  /*************************************************  /*************************************************
1145  *       Convert character value to UTF-8         *  *         Print 8-bit character string           *
1146  *************************************************/  *************************************************/
1147    
1148  /* This function takes an integer value in the range 0 - 0x7fffffff  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1149  and encodes it as a UTF-8 character in 0 to 6 bytes.  If handed a NULL file, just counts chars without printing. */
1150    
1151  Arguments:  static int pchars(pcre_uint8 *p, int length, FILE *f)
1152    cvalue     the character value  {
1153    utf8bytes  pointer to buffer for result - at least 6 bytes long  int c = 0;
1154    int yield = 0;
 Returns:     number of characters placed in the buffer  
 */  
1155    
1156    while (length-- > 0)
1157      {
1158  #if !defined NOUTF8  #if !defined NOUTF8
1159      if (use_utf)
1160        {
1161        int rc = utf82ord(p, &c);
1162        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1163          {
1164          length -= rc - 1;
1165          p += rc;
1166          yield += pchar(c, f);
1167          continue;
1168          }
1169        }
1170    #endif
1171      c = *p++;
1172      yield += pchar(c, f);
1173      }
1174    
1175  static int  return yield;
 ord2utf8(int cvalue, uschar *utf8bytes)  
 {  
 register int i, j;  
 for (i = 0; i < utf8_table1_size; i++)  
   if (cvalue <= utf8_table1[i]) break;  
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1176  }  }
   
1177  #endif  #endif
1178    
1179    
1180    
1181    #ifdef SUPPORT_PCRE16
1182  /*************************************************  /*************************************************
1183  *             Print character string             *  *           Print 16-bit character string        *
1184  *************************************************/  *************************************************/
1185    
1186  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1187  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1188    
1189  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1190  {  {
 int c = 0;  
1191  int yield = 0;  int yield = 0;
1192    
1193  while (length-- > 0)  while (length-- > 0)
1194    {    {
1195      int c = *p++ & 0xffff;
1196  #if !defined NOUTF8  #if !defined NOUTF8
1197    if (use_utf8)    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1198      {      {
1199      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1200        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1201        {        {
1202        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1203        p += rc;        length--;
1204        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1205        }        }
1206      }      }
1207  #endif  #endif
1208      yield += pchar(c, f);
    /* Not UTF-8, or malformed UTF-8  */  
   
   c = *p++;  
   if (PRINTHEX(c))  
     {  
     if (f != NULL) fprintf(f, "%c", c);  
     yield++;  
     }  
   else  
     {  
     if (f != NULL) fprintf(f, "\\x%02x", c);  
     yield += 4;  
     }  
1209    }    }
1210    
1211  return yield;  return yield;
1212  }  }
1213    #endif
1214    
1215    
1216    
# Line 840  if (callout_extra) Line 1239  if (callout_extra)
1239      else      else
1240        {        {
1241        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1242        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject + cb->offset_vector[i],
1243          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1244        fprintf(f, "\n");        fprintf(f, "\n");
1245        }        }
# Line 853  printed lengths of the substrings. */ Line 1252  printed lengths of the substrings. */
1252    
1253  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1254    
1255  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, cb->start_match, f);
1256  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject + cb->start_match,
1257    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1258    
1259  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1260    
1261  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject + cb->current_position,
1262    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1263    
1264  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 896  fprintf(outfile, "%.*s", (cb->next_item_ Line 1295  fprintf(outfile, "%.*s", (cb->next_item_
1295  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1296  first_callout = 0;  first_callout = 0;
1297    
1298    if (cb->mark != last_callout_mark)
1299      {
1300      fprintf(outfile, "Latest Mark: %s\n",
1301        (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));
1302      last_callout_mark = cb->mark;
1303      }
1304    
1305  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1306    {    {
1307    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 915  return (cb->callout_number != callout_fa Line 1321  return (cb->callout_number != callout_fa
1321  *            Local malloc functions              *  *            Local malloc functions              *
1322  *************************************************/  *************************************************/
1323    
1324  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1325  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1326    show_malloc variable is set only during matching. */
1327    
1328  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1329  {  {
1330  void *block = malloc(size);  void *block = malloc(size);
1331  gotten_store = size;  gotten_store = size;
1332    if (first_gotten_store == 0) first_gotten_store = size;
1333  if (show_malloc)  if (show_malloc)
1334    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1335  return block;  return block;
# Line 934  if (show_malloc) Line 1342  if (show_malloc)
1342  free(block);  free(block);
1343  }  }
1344    
   
1345  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1346    
1347  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 957  free(block); Line 1364  free(block);
1364  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1365  *************************************************/  *************************************************/
1366    
1367  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1368    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1369    value, but the code is defensive. */
1370    
1371  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1372  {  {
1373  int rc;  int rc;
1374  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1375    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1376    #ifdef SUPPORT_PCRE16
1377      rc = pcre16_fullinfo(re, study, option, ptr);
1378    #else
1379      rc = PCRE_ERROR_BADMODE;
1380    #endif
1381    else
1382    #ifdef SUPPORT_PCRE8
1383      rc = pcre_fullinfo(re, study, option, ptr);
1384    #else
1385      rc = PCRE_ERROR_BADMODE;
1386    #endif
1387    
1388    if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1389      use_pcre16? "16" : "", option);
1390  }  }
1391    
1392    
1393    
1394  /*************************************************  /*************************************************
1395  *         Byte flipping function                 *  *             Swap byte functions                *
1396  *************************************************/  *************************************************/
1397    
1398  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16
1399  byteflip(unsigned long int value, int n)  and pcre_uint32 value.
1400    
1401    Arguments:
1402      value        any number
1403    
1404    Returns:       the byte swapped value
1405    */
1406    
1407    static pcre_uint32
1408    swap_uint32(pcre_uint32 value)
1409  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1410  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1411         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1412         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1413         ((value & 0xff000000) >> 24);         (value >> 24);
1414  }  }
1415    
1416    static pcre_uint16
1417    swap_uint16(pcre_uint16 value)
1418    {
1419    return (value >> 8) | (value << 8);
1420    }
1421    
1422    
1423    
1424    /*************************************************
1425    *        Flip bytes in a compiled pattern        *
1426    *************************************************/
1427    
1428    /* This function is called if the 'F' option was present on a pattern that is
1429    to be written to a file. We flip the bytes of all the integer fields in the
1430    regex data block and the study block. In 16-bit mode this also flips relevant
1431    bytes in the pattern itself. This is to make it possible to test PCRE's
1432    ability to reload byte-flipped patterns, e.g. those compiled on a different
1433    architecture. */
1434    
1435    static void
1436    regexflip(pcre *ere, pcre_extra *extra)
1437    {
1438    real_pcre *re = (real_pcre *)ere;
1439    int op;
1440    
1441    #ifdef SUPPORT_PCRE16
1442    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1443    int length = re->name_count * re->name_entry_size;
1444    #ifdef SUPPORT_UTF
1445    BOOL utf = (re->options & PCRE_UTF16) != 0;
1446    BOOL utf16_char = FALSE;
1447    #endif /* SUPPORT_UTF */
1448    #endif /* SUPPORT_PCRE16 */
1449    
1450    /* Always flip the bytes in the main data block and study blocks. */
1451    
1452    re->magic_number = REVERSED_MAGIC_NUMBER;
1453    re->size = swap_uint32(re->size);
1454    re->options = swap_uint32(re->options);
1455    re->flags = swap_uint16(re->flags);
1456    re->top_bracket = swap_uint16(re->top_bracket);
1457    re->top_backref = swap_uint16(re->top_backref);
1458    re->first_char = swap_uint16(re->first_char);
1459    re->req_char = swap_uint16(re->req_char);
1460    re->name_table_offset = swap_uint16(re->name_table_offset);
1461    re->name_entry_size = swap_uint16(re->name_entry_size);
1462    re->name_count = swap_uint16(re->name_count);
1463    
1464    if (extra != NULL)
1465      {
1466      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1467      rsd->size = swap_uint32(rsd->size);
1468      rsd->flags = swap_uint32(rsd->flags);
1469      rsd->minlength = swap_uint32(rsd->minlength);
1470      }
1471    
1472    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1473    in the name table, if present, and then in the pattern itself. */
1474    
1475    #ifdef SUPPORT_PCRE16
1476    if (!use_pcre16) return;
1477    
1478    while(TRUE)
1479      {
1480      /* Swap previous characters. */
1481      while (length-- > 0)
1482        {
1483        *ptr = swap_uint16(*ptr);
1484        ptr++;
1485        }
1486    #ifdef SUPPORT_UTF
1487      if (utf16_char)
1488        {
1489        if ((ptr[-1] & 0xfc00) == 0xd800)
1490          {
1491          /* We know that there is only one extra character in UTF-16. */
1492          *ptr = swap_uint16(*ptr);
1493          ptr++;
1494          }
1495        }
1496      utf16_char = FALSE;
1497    #endif /* SUPPORT_UTF */
1498    
1499      /* Get next opcode. */
1500    
1501      length = 0;
1502      op = *ptr;
1503      *ptr++ = swap_uint16(op);
1504    
1505      switch (op)
1506        {
1507        case OP_END:
1508        return;
1509    
1510        case OP_CHAR:
1511        case OP_CHARI:
1512        case OP_NOT:
1513        case OP_NOTI:
1514        case OP_STAR:
1515        case OP_MINSTAR:
1516        case OP_PLUS:
1517        case OP_MINPLUS:
1518        case OP_QUERY:
1519        case OP_MINQUERY:
1520        case OP_UPTO:
1521        case OP_MINUPTO:
1522        case OP_EXACT:
1523        case OP_POSSTAR:
1524        case OP_POSPLUS:
1525        case OP_POSQUERY:
1526        case OP_POSUPTO:
1527        case OP_STARI:
1528        case OP_MINSTARI:
1529        case OP_PLUSI:
1530        case OP_MINPLUSI:
1531        case OP_QUERYI:
1532        case OP_MINQUERYI:
1533        case OP_UPTOI:
1534        case OP_MINUPTOI:
1535        case OP_EXACTI:
1536        case OP_POSSTARI:
1537        case OP_POSPLUSI:
1538        case OP_POSQUERYI:
1539        case OP_POSUPTOI:
1540        case OP_NOTSTAR:
1541        case OP_NOTMINSTAR:
1542        case OP_NOTPLUS:
1543        case OP_NOTMINPLUS:
1544        case OP_NOTQUERY:
1545        case OP_NOTMINQUERY:
1546        case OP_NOTUPTO:
1547        case OP_NOTMINUPTO:
1548        case OP_NOTEXACT:
1549        case OP_NOTPOSSTAR:
1550        case OP_NOTPOSPLUS:
1551        case OP_NOTPOSQUERY:
1552        case OP_NOTPOSUPTO:
1553        case OP_NOTSTARI:
1554        case OP_NOTMINSTARI:
1555        case OP_NOTPLUSI:
1556        case OP_NOTMINPLUSI:
1557        case OP_NOTQUERYI:
1558        case OP_NOTMINQUERYI:
1559        case OP_NOTUPTOI:
1560        case OP_NOTMINUPTOI:
1561        case OP_NOTEXACTI:
1562        case OP_NOTPOSSTARI:
1563        case OP_NOTPOSPLUSI:
1564        case OP_NOTPOSQUERYI:
1565        case OP_NOTPOSUPTOI:
1566    #ifdef SUPPORT_UTF
1567        if (utf) utf16_char = TRUE;
1568    #endif
1569        length = OP_lengths16[op] - 1;
1570        break;
1571    
1572        case OP_CLASS:
1573        case OP_NCLASS:
1574        /* Skip the character bit map. */
1575        ptr += 32/sizeof(pcre_uint16);
1576        length = 0;
1577        break;
1578    
1579        case OP_XCLASS:
1580        /* Reverse the size of the XCLASS instance. */
1581        ptr++;
1582        *ptr = swap_uint16(*ptr);
1583        if (LINK_SIZE > 1)
1584          {
1585          /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1586          ptr++;
1587          *ptr = swap_uint16(*ptr);
1588          }
1589        ptr++;
1590    
1591        if (LINK_SIZE > 1)
1592          length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1593            (1 + LINK_SIZE + 1);
1594        else
1595          length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1596    
1597        op = *ptr;
1598        *ptr = swap_uint16(op);
1599        if ((op & XCL_MAP) != 0)
1600          {
1601          /* Skip the character bit map. */
1602          ptr += 32/sizeof(pcre_uint16);
1603          length -= 32/sizeof(pcre_uint16);
1604          }
1605        break;
1606    
1607        default:
1608        length = OP_lengths16[op] - 1;
1609        break;
1610        }
1611      }
1612    /* Control should never reach here in 16 bit mode. */
1613    #endif /* SUPPORT_PCRE16 */
1614    }
1615    
1616    
1617    
# Line 990  return ((value & 0x000000ff) << 24) | Line 1620  return ((value & 0x000000ff) << 24) |
1620  *************************************************/  *************************************************/
1621    
1622  static int  static int
1623  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1624    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1625    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1626  {  {
# Line 1005  for (;;) Line 1635  for (;;)
1635    {    {
1636    *limit = mid;    *limit = mid;
1637    
1638    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1639      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
1640    
1641    if (count == errnumber)    if (count == errnumber)
# Line 1050  Returns: < 0, = 0, or > 0, according Line 1680  Returns: < 0, = 0, or > 0, according
1680  */  */
1681    
1682  static int  static int
1683  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
1684  {  {
1685  while (n--)  while (n--)
1686    {    {
# Line 1077  Returns: appropriate PCRE_NEWLINE_x Line 1707  Returns: appropriate PCRE_NEWLINE_x
1707  */  */
1708    
1709  static int  static int
1710  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
1711  {  {
1712  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
1713  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
1714  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
1715  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
1716  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
1717  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
1718  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
1719  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
1720  return 0;  return 0;
1721  }  }
# Line 1107  printf("If input is a terminal, readline Line 1737  printf("If input is a terminal, readline
1737  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
1738  #endif  #endif
1739  printf("\nOptions:\n");  printf("\nOptions:\n");
1740    #ifdef SUPPORT_PCRE16
1741    printf("  -16      use 16-bit interface\n");
1742    #endif
1743  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
1744  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
1745  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
# Line 1123  printf(" -p use POSIX interface\n Line 1756  printf(" -p use POSIX interface\n
1756  #endif  #endif
1757  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
1758  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
1759  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
1760           "  -s+      force each pattern to be studied, using JIT if available\n"
1761         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
1762  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
1763  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1151  int timeit = 0; Line 1785  int timeit = 0;
1785  int timeitm = 0;  int timeitm = 0;
1786  int showinfo = 0;  int showinfo = 0;
1787  int showstore = 0;  int showstore = 0;
1788    int force_study = -1;
1789    int force_study_options = 0;
1790  int quiet = 0;  int quiet = 0;
1791  int size_offsets = 45;  int size_offsets = 45;
1792  int size_offsets_max;  int size_offsets_max;
# Line 1164  int all_use_dfa = 0; Line 1800  int all_use_dfa = 0;
1800  int yield = 0;  int yield = 0;
1801  int stack_size;  int stack_size;
1802    
1803    pcre_jit_stack *jit_stack = NULL;
1804    
1805  /* These vectors store, end-to-end, a list of captured substring names. Assume  /* These vectors store, end-to-end, a list of captured substring names. Assume
1806  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. */
1807    
1808  uschar copynames[1024];  pcre_uchar copynames[1024];
1809  uschar getnames[1024];  pcre_uchar getnames[1024];
   
 uschar *copynamesptr;  
 uschar *getnamesptr;  
1810    
1811  /* Get buffers from malloc() so that Electric Fence will check their misuse  pcre_uchar *copynamesptr;
1812  when I am debugging. They grow automatically when very long lines are read. */  pcre_uchar *getnamesptr;
1813    
1814  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
1815  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
1816  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
1817    
1818    buffer = (pcre_uint8 *)malloc(buffer_size);
1819    dbuffer = (pcre_uint8 *)malloc(buffer_size);
1820    pbuffer = (pcre_uint8 *)malloc(buffer_size);
1821    
1822  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
1823    
# Line 1197  _setmode( _fileno( stdout ), _O_BINARY ) Line 1836  _setmode( _fileno( stdout ), _O_BINARY )
1836    
1837  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1838    {    {
1839    unsigned char *endptr;    pcre_uint8 *endptr;
1840    
1841      if (strcmp(argv[op], "-m") == 0) showstore = 1;
1842      else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1843      else if (strcmp(argv[op], "-s+") == 0)
1844        {
1845        force_study = 1;
1846        force_study_options = PCRE_STUDY_JIT_COMPILE;
1847        }
1848    #ifdef SUPPORT_PCRE16
1849      else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1850    #endif
1851    
   if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)  
     showstore = 1;  
1852    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1853    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1854    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
# Line 1210  while (argc > 1 && argv[op][0] == '-') Line 1858  while (argc > 1 && argv[op][0] == '-')
1858    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1859  #endif  #endif
1860    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1861        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1862          *endptr == 0))          *endptr == 0))
1863      {      {
1864      op++;      op++;
# Line 1220  while (argc > 1 && argv[op][0] == '-') Line 1868  while (argc > 1 && argv[op][0] == '-')
1868      {      {
1869      int both = argv[op][2] == 0;      int both = argv[op][2] == 0;
1870      int temp;      int temp;
1871      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1872                       *endptr == 0))                       *endptr == 0))
1873        {        {
1874        timeitm = temp;        timeitm = temp;
# Line 1231  while (argc > 1 && argv[op][0] == '-') Line 1879  while (argc > 1 && argv[op][0] == '-')
1879      if (both) timeit = timeitm;      if (both) timeit = timeitm;
1880      }      }
1881    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1882        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1883          *endptr == 0))          *endptr == 0))
1884      {      {
1885  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
1886      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
1887      exit(1);      exit(1);
1888  #else  #else
# Line 1261  while (argc > 1 && argv[op][0] == '-') Line 1909  while (argc > 1 && argv[op][0] == '-')
1909      unsigned long int lrc;      unsigned long int lrc;
1910      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1911      printf("Compiled with\n");      printf("Compiled with\n");
1912    
1913    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1914    are set, either both UTFs are supported or both are not supported. */
1915    
1916    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1917        printf("  8-bit and 16-bit support\n");
1918        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1919        if (rc)
1920          printf("  UTF-8 and UTF-16 support\n");
1921        else
1922          printf("  No UTF-8 or UTF-16 support\n");
1923    #elif defined SUPPORT_PCRE8
1924        printf("  8-bit support only\n");
1925      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1926      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1927    #else
1928        printf("  16-bit support only\n");
1929        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1930        printf("  %sUTF-16 support\n", rc? "" : "No ");
1931    #endif
1932    
1933      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1934      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1935        (void)pcre_config(PCRE_CONFIG_JIT, &rc);
1936        if (rc)
1937          printf("  Just-in-time compiler support\n");
1938        else
1939          printf("  No just-in-time compiler support\n");
1940      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
1941      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
1942      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
# Line 1342  if (argc > 2) Line 2014  if (argc > 2)
2014    
2015  /* Set alternative malloc function */  /* Set alternative malloc function */
2016    
2017    #ifdef SUPPORT_PCRE8
2018  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2019  pcre_free = new_free;  pcre_free = new_free;
2020  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2021  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2022    #endif
2023    
2024    #ifdef SUPPORT_PCRE16
2025    pcre16_malloc = new_malloc;
2026    pcre16_free = new_free;
2027    pcre16_stack_malloc = stack_malloc;
2028    pcre16_stack_free = stack_free;
2029    #endif
2030    
2031  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2032    
# Line 1364  while (!done) Line 2045  while (!done)
2045  #endif  #endif
2046    
2047    const char *error;    const char *error;
2048    unsigned char *markptr;    pcre_uint8 *markptr;
2049    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2050    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2051    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2052    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2053    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2054      int do_allcaps = 0;
2055    int do_mark = 0;    int do_mark = 0;
2056    int do_study = 0;    int do_study = 0;
2057      int no_force_study = 0;
2058    int do_debug = debug;    int do_debug = debug;
2059    int do_G = 0;    int do_G = 0;
2060    int do_g = 0;    int do_g = 0;
2061    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2062    int do_showrest = 0;    int do_showrest = 0;
2063      int do_showcaprest = 0;
2064    int do_flip = 0;    int do_flip = 0;
2065    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2066    
2067    use_utf8 = 0;    use_utf = 0;
2068    debug_lengths = 1;    debug_lengths = 1;
2069    
2070    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1396  while (!done) Line 2080  while (!done)
2080    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2081      {      {
2082      unsigned long int magic, get_options;      unsigned long int magic, get_options;
2083      uschar sbuf[8];      pcre_uint8 sbuf[8];
2084      FILE *f;      FILE *f;
2085    
2086      p++;      p++;
# Line 1419  while (!done) Line 2103  while (!done)
2103        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2104    
2105      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
2106      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2107    
2108      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2109    
2110      magic = ((real_pcre *)re)->magic_number;      magic = ((real_pcre *)re)->magic_number;
2111      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2112        {        {
2113        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2114          {          {
2115          do_flip = 1;          do_flip = 1;
2116          }          }
# Line 1438  while (!done) Line 2122  while (!done)
2122          }          }
2123        }        }
2124    
2125      fprintf(outfile, "Compiled regex%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2126        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
2127    
2128      /* Need to know if UTF-8 for printing data strings */      /* Now see if there is any following study data. */
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2129    
2130      if (true_study_size != 0)      if (true_study_size != 0)
2131        {        {
# Line 1462  while (!done) Line 2141  while (!done)
2141          {          {
2142          FAIL_READ:          FAIL_READ:
2143          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2144          if (extra != NULL) new_free(extra);          if (extra != NULL)
2145              {
2146              PCRE_FREE_STUDY(extra);
2147              }
2148          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2149          fclose(f);          fclose(f);
2150          continue;          continue;
# Line 1472  while (!done) Line 2154  while (!done)
2154        }        }
2155      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2156    
2157        /* Flip the necessary bytes. */
2158        if (do_flip)
2159          {
2160          PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2161          }
2162    
2163        /* Need to know if UTF-8 for printing data strings */
2164    
2165        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2166        use_utf = (get_options & PCRE_UTF8) != 0;
2167    
2168      fclose(f);      fclose(f);
2169      goto SHOW_INFO;      goto SHOW_INFO;
2170      }      }
2171    
2172    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2173    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2174    
2175    delimiter = *p++;    delimiter = *p++;
2176    
# Line 1542  while (!done) Line 2235  while (!done)
2235        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2236        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2237    
2238        case '+': do_showrest = 1; break;        case '+':
2239          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2240          break;
2241    
2242          case '=': do_allcaps = 1; break;
2243        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2244        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2245        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1560  while (!done) Line 2257  while (!done)
2257        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2258  #endif  #endif
2259    
2260        case 'S': do_study = 1; break;        case 'S':
2261          if (do_study == 0)
2262            {
2263            do_study = 1;
2264            if (*pp == '+')
2265              {
2266              study_options |= PCRE_STUDY_JIT_COMPILE;
2267              pp++;
2268              }
2269            }
2270          else
2271            {
2272            do_study = 0;
2273            no_force_study = 1;
2274            }
2275          break;
2276    
2277        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2278        case 'W': options |= PCRE_UCP; break;        case 'W': options |= PCRE_UCP; break;
2279        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2280          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2281        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2282        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2283        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2284    
2285        case 'T':        case 'T':
# Line 1612  while (!done) Line 2326  while (!done)
2326    
2327        case '<':        case '<':
2328          {          {
2329          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2330            {            {
2331            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2332            pp += 3;            pp += 3;
# Line 1640  while (!done) Line 2354  while (!done)
2354    
2355    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2356    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2357    local character tables. */    local character tables. Neither does it have 16-bit support. */
2358    
2359  #if !defined NOPOSIX  #if !defined NOPOSIX
2360    if (posix || do_posix)    if (posix || do_posix)
# Line 1656  while (!done) Line 2370  while (!done)
2370      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2371      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2372    
2373        first_gotten_store = 0;
2374      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2375    
2376      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1677  while (!done) Line 2392  while (!done)
2392      {      {
2393      unsigned long int get_options;      unsigned long int get_options;
2394    
2395        /* In 16-bit mode, convert the input. */
2396    
2397    #ifdef SUPPORT_PCRE16
2398        if (use_pcre16)
2399          {
2400          if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2401            {
2402            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2403              "converted to UTF-16\n");
2404            goto SKIP_DATA;
2405            }
2406          p = (pcre_uint8 *)buffer16;
2407          }
2408    #endif
2409    
2410        /* Compile many times when timing */
2411    
2412      if (timeit > 0)      if (timeit > 0)
2413        {        {
2414        register int i;        register int i;
# Line 1684  while (!done) Line 2416  while (!done)
2416        clock_t start_time = clock();        clock_t start_time = clock();
2417        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2418          {          {
2419          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2420          if (re != NULL) free(re);          if (re != NULL) free(re);
2421          }          }
2422        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1693  while (!done) Line 2425  while (!done)
2425            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2426        }        }
2427    
2428      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2429        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2430    
2431      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2432      if non-interactive. */      if non-interactive. */
# Line 1725  while (!done) Line 2458  while (!done)
2458      lines. */      lines. */
2459    
2460      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2461      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2462    
2463        /* Extract the size for possible writing before possibly flipping it,
2464        and remember the store that was got. */
2465    
2466        true_size = ((real_pcre *)re)->size;
2467        regex_gotten_store = first_gotten_store;
2468    
2469      /* Print information if required. There are now two info-returning      /* Output code size information if requested */
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
2470    
2471      if (log_store)      if (log_store)
2472        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
2473          (int)(gotten_store -          (int)(first_gotten_store -
2474                sizeof(real_pcre) -                sizeof(real_pcre) -
2475                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2476    
2477      /* Extract the size for possible writing before possibly flipping it,      /* If -s or /S was present, study the regex to generate additional info to
2478      and remember the store that was got. */      help with the matching, unless the pattern has the SS option, which
2479        suppresses the effect of /S (used for a few test patterns where studying is
2480      true_size = ((real_pcre *)re)->size;      never sensible). */
     regex_gotten_store = gotten_store;  
   
     /* If /S was present, study the regexp to generate additional info to  
     help with the matching. */  
2481    
2482      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
2483        {        {
2484        if (timeit > 0)        if (timeit > 0)
2485          {          {
# Line 1754  while (!done) Line 2487  while (!done)
2487          clock_t time_taken;          clock_t time_taken;
2488          clock_t start_time = clock();          clock_t start_time = clock();
2489          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
2490            extra = pcre_study(re, study_options, &error);            {
2491              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2492              }
2493          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2494          if (extra != NULL) free(extra);          if (extra != NULL)
2495              {
2496              PCRE_FREE_STUDY(extra);
2497              }
2498          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2499            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
2500              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2501          }          }
2502        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2503        if (error != NULL)        if (error != NULL)
2504          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2505        else if (extra != NULL)        else if (extra != NULL)
2506            {
2507          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2508            if (log_store)
2509              {
2510              size_t jitsize;
2511              new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize);
2512              if (jitsize != 0)
2513                fprintf(outfile, "Memory allocation (JIT code): %d\n", jitsize);
2514              }
2515            }
2516        }        }
2517    
2518      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1781  while (!done) Line 2528  while (!done)
2528        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
2529        }        }
2530    
2531      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
2532    
2533      SHOW_INFO:      SHOW_INFO:
2534    
2535      if (do_debug)      if (do_debug)
2536        {        {
2537        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
2538    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2539          if (use_pcre16)
2540            pcre16_printint(re, outfile, debug_lengths);
2541          else
2542            pcre_printint(re, outfile, debug_lengths);
2543    #elif defined SUPPORT_PCRE8
2544        pcre_printint(re, outfile, debug_lengths);        pcre_printint(re, outfile, debug_lengths);
2545    #else
2546          pcre16_printint(re, outfile, debug_lengths);
2547    #endif
2548        }        }
2549    
2550      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1839  while (!done) Line 2558  while (!done)
2558        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
2559          hascrorlf;          hascrorlf;
2560        int nameentrysize, namecount;        int nameentrysize, namecount;
2561        const uschar *nametable;        const pcre_uchar *nametable;
2562    
2563        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
2564        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1853  while (!done) Line 2572  while (!done)
2572        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
2573        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
2574    
2575          /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2576          that it gives the same results as the new function. */
2577    
2578  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2579        old_count = pcre_info(re, &old_options, &old_first_char);        if (!use_pcre16)
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
2580          {          {
2581          if (old_count != count) fprintf(outfile,          old_count = pcre_info(re, &old_options, &old_first_char);
2582            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,          if (count < 0) fprintf(outfile,
2583              old_count);            "Error %d from pcre_info()\n", count);
2584            else
2585          if (old_first_char != first_char) fprintf(outfile,            {
2586            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            if (old_count != count) fprintf(outfile,
2587              first_char, old_first_char);              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2588                  old_count);
2589          if (old_options != (int)get_options) fprintf(outfile,  
2590            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            if (old_first_char != first_char) fprintf(outfile,
2591              get_options, old_options);              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2592                  first_char, old_first_char);
2593    
2594              if (old_options != (int)get_options) fprintf(outfile,
2595                "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2596                  get_options, old_options);
2597              }
2598          }          }
2599  #endif  #endif
2600    
# Line 1897  while (!done) Line 2622  while (!done)
2622        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
2623    
2624        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
2625        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
2626    
2627        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
2628          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
2629            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
2630            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
2631            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1913  while (!done) Line 2638  while (!done)
2638            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2639            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2640            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2641            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2642            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2643            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2644              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2645            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2646    
2647        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1956  while (!done) Line 2682  while (!done)
2682          }          }
2683        else        else
2684          {          {
2685          int ch = first_char & 255;          const char *caseless =
2686          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2687            "" : " (caseless)";            "" : " (caseless)";
2688          if (PRINTHEX(ch))  
2689            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
2690              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2691          else          else
2692            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
2693              fprintf(outfile, "First char = ");
2694              pchar(first_char, outfile);
2695              fprintf(outfile, "%s\n", caseless);
2696              }
2697          }          }
2698    
2699        if (need_char < 0)        if (need_char < 0)
# Line 1971  while (!done) Line 2702  while (!done)
2702          }          }
2703        else        else
2704          {          {
2705          int ch = need_char & 255;          const char *caseless =
2706          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2707            "" : " (caseless)";            "" : " (caseless)";
2708          if (PRINTHEX(ch))  
2709            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
2710              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2711          else          else
2712            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
2713              fprintf(outfile, "Need char = ");
2714              pchar(need_char, outfile);
2715              fprintf(outfile, "%s\n", caseless);
2716              }
2717          }          }
2718    
2719        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
2720        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
2721        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
2722        flipped.) */        flipped.) If study was forced by an external -s, don't show this
2723          information unless -i or -d was also present. This means that, except
2724          when auto-callouts are involved, the output from runs with and without
2725          -s should be identical. */
2726    
2727        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
2728          {          {
2729          if (extra == NULL)          if (extra == NULL)
2730            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
2731          else          else
2732            {            {
2733            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
2734            int minlength;            int minlength;
2735    
2736            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
# Line 2014  while (!done) Line 2753  while (!done)
2753                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2754                    c = 2;                    c = 2;
2755                    }                    }
2756                  if (PRINTHEX(i) && i != ' ')                  if (PRINTOK(i) && i != ' ')
2757                    {                    {
2758                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2759                    c += 2;                    c += 2;
# Line 2029  while (!done) Line 2768  while (!done)
2768              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2769              }              }
2770            }            }
2771    
2772            /* Show this only if the JIT was set by /S, not by -s. */
2773    
2774            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
2775              {
2776              int jit;
2777              new_info(re, extra, PCRE_INFO_JIT, &jit);
2778              if (jit)
2779                fprintf(outfile, "JIT study was successful\n");
2780              else
2781    #ifdef SUPPORT_JIT
2782                fprintf(outfile, "JIT study was not successful\n");
2783    #else
2784                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
2785    #endif
2786              }
2787          }          }
2788        }        }
2789    
# Line 2045  while (!done) Line 2800  while (!done)
2800          }          }
2801        else        else
2802          {          {
2803          uschar sbuf[8];          pcre_uint8 sbuf[8];
2804          sbuf[0] = (uschar)((true_size >> 24) & 255);  
2805          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
2806          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
2807          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
2808            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
2809          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
2810          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
2811          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
2812          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
2813            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
2814    
2815          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
2816              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2063  while (!done) Line 2819  while (!done)
2819            }            }
2820          else          else
2821            {            {
2822            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
2823    
2824              /* If there is study data, write it. */
2825    
2826            if (extra != NULL)            if (extra != NULL)
2827              {              {
2828              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 2073  while (!done) Line 2832  while (!done)
2832                  strerror(errno));                  strerror(errno));
2833                }                }
2834              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
2835              }              }
2836            }            }
2837          fclose(f);          fclose(f);
2838          }          }
2839    
2840        new_free(re);        new_free(re);
2841        if (extra != NULL) new_free(extra);        if (extra != NULL)
2842            {
2843            PCRE_FREE_STUDY(extra);
2844            }
2845        if (locale_set)        if (locale_set)
2846          {          {
2847          new_free((void *)tables);          new_free((void *)tables);
# Line 2095  while (!done) Line 2856  while (!done)
2856    
2857    for (;;)    for (;;)
2858      {      {
2859      uschar *q;      pcre_uint8 *q;
2860      uschar *bptr;      pcre_uint8 *bptr;
2861      int *use_offsets = offsets;      int *use_offsets = offsets;
2862      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
2863      int callout_data = 0;      int callout_data = 0;
# Line 2108  while (!done) Line 2869  while (!done)
2869      int getlist = 0;      int getlist = 0;
2870      int gmatched = 0;      int gmatched = 0;
2871      int start_offset = 0;      int start_offset = 0;
2872        int start_offset_sign = 1;
2873      int g_notempty = 0;      int g_notempty = 0;
2874      int use_dfa = 0;      int use_dfa = 0;
2875    
# Line 2121  while (!done) Line 2883  while (!done)
2883    
2884      pcre_callout = callout;      pcre_callout = callout;
2885      first_callout = 1;      first_callout = 1;
2886        last_callout_mark = NULL;
2887      callout_extra = 0;      callout_extra = 0;
2888      callout_count = 0;      callout_count = 0;
2889      callout_fail_count = 999999;      callout_fail_count = 999999;
# Line 2179  while (!done) Line 2942  while (!done)
2942            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2943    
2944  #if !defined NOUTF8  #if !defined NOUTF8
2945          if (use_utf8 && c > 255)          if (use_utf && c > 255)
2946            {            {
2947            unsigned char buff8[8];            pcre_uint8 buff8[8];
2948            int ii, utn;            int ii, utn;
2949            utn = ord2utf8(c, buff8);            utn = ord2utf8(c, buff8);
2950            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
# Line 2197  while (!done) Line 2960  while (!done)
2960  #if !defined NOUTF8  #if !defined NOUTF8
2961          if (*p == '{')          if (*p == '{')
2962            {            {
2963            unsigned char *pt = p;            pcre_uint8 *pt = p;
2964            c = 0;            c = 0;
2965            while (isxdigit(*(++pt)))  
2966              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
2967              when isxdigit() is a macro that refers to its argument more than
2968              once. This is banned by the C Standard, but apparently happens in at
2969              least one MacOS environment. */
2970    
2971              for (pt++; isxdigit(*pt); pt++)
2972                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2973            if (*pt == '}')            if (*pt == '}')
2974              {              {
2975              unsigned char buff8[8];              pcre_uint8 buff8[8];
2976              int ii, utn;              int ii, utn;
2977              if (use_utf8)              if (use_utf)
2978                {                {
2979                utn = ord2utf8(c, buff8);                utn = ord2utf8(c, buff8);
2980                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
# Line 2214  while (!done) Line 2983  while (!done)
2983              else              else
2984               {               {
2985               if (c > 255)               if (c > 255)
2986                 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "                 {
2987                   "UTF-8 mode is not enabled.\n"                 if (use_pcre16)
2988                   "** Truncation will probably give the wrong result.\n", c);                   fprintf(outfile, "** Character \\x{%x} is greater than 255.\n"
2989                       "** Because its input is first processed as 8-bit, pcretest "
2990                       "does not\n** support such characters in 16-bit mode when "
2991                       "UTF-16 is not set.\n", c);
2992                   else
2993                     fprintf(outfile, "** Character \\x{%x} is greater than 255 "
2994                       "and UTF-8 mode is not enabled.\n", c);
2995    
2996                   fprintf(outfile, "** Truncation will probably give the wrong "
2997                     "result.\n");
2998                   }
2999               }               }
3000              p = pt + 1;              p = pt + 1;
3001              break;              break;
# Line 2230  while (!done) Line 3009  while (!done)
3009          c = 0;          c = 0;
3010          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3011            {            {
3012            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3013            p++;            p++;
3014            }            }
3015          break;          break;
# Line 2240  while (!done) Line 3019  while (!done)
3019          continue;          continue;
3020    
3021          case '>':          case '>':
3022            if (*p == '-')
3023              {
3024              start_offset_sign = -1;
3025              p++;
3026              }
3027          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3028            start_offset *= start_offset_sign;
3029          continue;          continue;
3030    
3031          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 2259  while (!done) Line 3044  while (!done)
3044            }            }
3045          else if (isalnum(*p))          else if (isalnum(*p))
3046            {            {
3047            uschar *npp = copynamesptr;            pcre_uchar *npp = copynamesptr;
3048            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
3049            *npp++ = 0;            *npp++ = 0;
3050            *npp = 0;            *npp = 0;
# Line 2329  while (!done) Line 3114  while (!done)
3114            }            }
3115          else if (isalnum(*p))          else if (isalnum(*p))
3116            {            {
3117            uschar *npp = getnamesptr;            pcre_uchar *npp = getnamesptr;
3118            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
3119            *npp++ = 0;            *npp++ = 0;
3120            *npp = 0;            *npp = 0;
# Line 2340  while (!done) Line 3125  while (!done)
3125            }            }
3126          continue;          continue;
3127    
3128            case 'J':
3129            while(isdigit(*p)) n = n * 10 + *p++ - '0';
3130            if (extra != NULL
3131                && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3132                && extra->executable_jit != NULL)
3133              {
3134              if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);
3135              jit_stack = pcre_jit_stack_alloc(1, n * 1024);
3136              pcre_assign_jit_stack(extra, jit_callback, jit_stack);
3137              }
3138            continue;
3139    
3140          case 'L':          case 'L':
3141          getlist = 1;          getlist = 1;
3142          continue;          continue;
# Line 2495  while (!done) Line 3292  while (!done)
3292            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3293              {              {
3294              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3295              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer + pmatch[i].rm_so,
3296                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3297              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3298              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3299                {                {
3300                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3301                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3302                  outfile);                  outfile);
3303                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3304                }                }
# Line 2509  while (!done) Line 3306  while (!done)
3306            }            }
3307          }          }
3308        free(pmatch);        free(pmatch);
3309          goto NEXT_DATA;
3310        }        }
3311    
3312    #endif  /* !defined NOPOSIX */
3313    
3314      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3315    
3316      else  #ifdef SUPPORT_PCRE16
3317  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3318          {
3319          len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3320          if (len < 0)
3321            {
3322            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3323              "converted to UTF-16\n");
3324            goto NEXT_DATA;
3325            }
3326          bptr = (pcre_uint8 *)buffer16;
3327          }
3328    #endif
3329    
3330      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3331        {        {
# Line 2531  while (!done) Line 3342  while (!done)
3342            {            {
3343            int workspace[1000];            int workspace[1000];
3344            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3345              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3346                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3347                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3348                  (sizeof(workspace)/sizeof(int)));
3349                }
3350            }            }
3351          else          else
3352  #endif  #endif
3353    
3354          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3355            count = pcre_exec(re, extra, (char *)bptr, len,            {
3356              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3357                (options | g_notempty), use_offsets, use_size_offsets);
3358              }
3359          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3360          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3361            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2550  while (!done) Line 3364  while (!done)
3364    
3365        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3366        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3367        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3368          running of pcre_exec(), so disable the JIT optimization. This makes it
3369          possible to run the same set of tests with and without JIT externally
3370          requested. */
3371    
3372        if (find_match_limit)        if (find_match_limit)
3373          {          {
# Line 2559  while (!done) Line 3376  while (!done)
3376            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3377            extra->flags = 0;            extra->flags = 0;
3378            }            }
3379            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3380    
3381          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3382            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2582  while (!done) Line 3400  while (!done)
3400            }            }
3401          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3402          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3403          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3404            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3405          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3406          }          }
# Line 2594  while (!done) Line 3412  while (!done)
3412        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3413          {          {
3414          int workspace[1000];          int workspace[1000];
3415          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3416            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3417            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3418          if (count == 0)          if (count == 0)
3419            {            {
3420            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2607  while (!done) Line 3425  while (!done)
3425    
3426        else        else
3427          {          {
3428          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3429            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3430          if (count == 0)          if (count == 0)
3431            {            {
3432            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2642  while (!done) Line 3460  while (!done)
3460              }              }
3461            }            }
3462    
3463            /* do_allcaps requests showing of all captures in the pattern, to check
3464            unset ones at the end. */
3465    
3466            if (do_allcaps)
3467              {
3468              new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
3469              count++;   /* Allow for full match */
3470              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3471              }
3472    
3473            /* Output the captured substrings */
3474    
3475          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
3476            {            {
3477            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
3478                {
3479                if (use_offsets[i] != -1)
3480                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3481                    use_offsets[i], i);
3482                if (use_offsets[i+1] != -1)
3483                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
3484                    use_offsets[i+1], i+1);
3485              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
3486                }
3487            else            else
3488              {              {
3489              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3490              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr + use_offsets[i],
3491                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3492              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3493              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
3494                {                {
3495                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
3496                  {                PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3497                  fprintf(outfile, " 0+ ");                  outfile);
3498                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
3499                }                }
3500              }              }
3501            }            }
3502    
3503          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
3504              {
3505              int mplen;
3506              if (use_pcre16)
3507                {
3508                pcre_uint16 *mp = (pcre_uint16 *)markptr;
3509                mplen = 0;
3510                while (*mp++ != 0) mplen++;
3511                }
3512              else mplen = (int)strlen((char *)markptr);
3513              fprintf(outfile, "MK: ");
3514              PCHARSV(markptr, mplen, outfile);
3515              fprintf(outfile, "\n");
3516              }
3517    
3518          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3519            {            {
# Line 2740  while (!done) Line 3588  while (!done)
3588                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
3589              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
3590                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
             /* free((void *)stringlist); */  
3591              pcre_free_substring_list(stringlist);              pcre_free_substring_list(stringlist);
3592              }              }
3593            }            }
# Line 2755  while (!done) Line 3602  while (!done)
3602          if (use_size_offsets > 1)          if (use_size_offsets > 1)
3603            {            {
3604            fprintf(outfile, ": ");            fprintf(outfile, ": ");
3605            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3606              outfile);              outfile);
3607            }            }
3608          fprintf(outfile, "\n");          fprintf(outfile, "\n");
# Line 2767  while (!done) Line 3614  while (!done)
3614        to advance the start offset, and continue. We won't be at the end of the        to advance the start offset, and continue. We won't be at the end of the
3615        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
3616    
3617        Complication arises in the case when the newline option is "any" or        Complication arises in the case when the newline convention is "any",
3618        "anycrlf". If the previous match was at the end of a line terminated by        "crlf", or "anycrlf". If the previous match was at the end of a line
3619        CRLF, an advance of one character just passes the \r, whereas we should        terminated by CRLF, an advance of one character just passes the \r,
3620        prefer the longer newline sequence, as does the code in pcre_exec().        whereas we should prefer the longer newline sequence, as does the code in
3621        Fudge the offset value to achieve this.        pcre_exec(). Fudge the offset value to achieve this. We check for a
3622          newline setting in the pattern; if none was set, use pcre_config() to
3623          find the default.
3624    
3625        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
3626        character, not one byte. */        character, not one byte. */
# Line 2796  while (!done) Line 3645  while (!done)
3645                      (d == -1)? PCRE_NEWLINE_ANY : 0;                      (d == -1)? PCRE_NEWLINE_ANY : 0;
3646              }              }
3647            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||            if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
3648                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_CRLF ||
3649                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
3650                &&                &&
3651                start_offset < len - 1 &&                start_offset < len - 1 &&
3652                bptr[start_offset] == '\r' &&                bptr[start_offset] == '\r' &&
3653                bptr[start_offset+1] == '\n')                bptr[start_offset+1] == '\n')
3654              onechar++;              onechar++;
3655            else if (use_utf8)            else if (use_utf)
3656              {              {
3657              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3658                {                {
3659                int tb = bptr[start_offset+onechar];                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
3660                if (tb <= 127) break;                onechar++;
               tb &= 0xc0;  
               if (tb != 0 && tb != 0xc0) onechar++;  
3661                }                }
3662              }              }
3663            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
3664            }            }
3665          else          else
3666            {            {
3667            if (count == PCRE_ERROR_NOMATCH)            switch(count)
3668              {              {
3669                case PCRE_ERROR_NOMATCH:
3670              if (gmatched == 0)              if (gmatched == 0)
3671                {                {
3672                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL) fprintf(outfile, "No match\n");
3673                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  else fprintf(outfile, "No match, mark = %s\n", markptr);
3674                }                }
3675                break;
3676    
3677                case PCRE_ERROR_BADUTF8:
3678                case PCRE_ERROR_SHORTUTF8:
3679                fprintf(outfile, "Error %d (%s UTF-8 string)", count,
3680                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short");
3681                if (use_size_offsets >= 2)
3682                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
3683                    use_offsets[1]);
3684                fprintf(outfile, "\n");
3685                break;
3686    
3687                default:
3688                if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
3689                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
3690                else
3691                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
3692                break;
3693              }              }
3694            else fprintf(outfile, "Error %d\n", count);  
3695            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
3696            }            }
3697          }          }
# Line 2871  while (!done) Line 3738  while (!done)
3738  #endif  #endif
3739    
3740    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3741    if (extra != NULL) new_free(extra);    if (extra != NULL)
3742        {
3743        PCRE_FREE_STUDY(extra);
3744        }
3745    if (locale_set)    if (locale_set)
3746      {      {
3747      new_free((void *)tables);      new_free((void *)tables);
3748      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
3749      locale_set = 0;      locale_set = 0;
3750      }      }
3751      if (jit_stack != NULL)
3752        {
3753        pcre_jit_stack_free(jit_stack);
3754        jit_stack = NULL;
3755        }
3756    }    }
3757    
3758  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 2892  free(dbuffer); Line 3767  free(dbuffer);
3767  free(pbuffer);  free(pbuffer);
3768  free(offsets);  free(offsets);
3769    
3770    #ifdef SUPPORT_PCRE16
3771    if (buffer16 != NULL) free(buffer16);
3772    #endif
3773    
3774  return yield;  return yield;
3775  }  }
3776    

Legend:
Removed from v.553  
changed lines
  Added in v.814

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12