/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 567 by ph10, Sat Nov 6 17:10:00 2010 UTC revision 909 by ph10, Sat Feb 4 15:01:47 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 79  input mode under Windows. */ Line 90  input mode under Windows. */
90  #define fileno _fileno  #define fileno _fileno
91  #endif  #endif
92    
93    /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95    #ifdef __BORLANDC__
96    #define _setmode(handle, mode) setmode(handle, mode)
97    #endif
98    
99    /* Not Windows */
100    
101  #else  #else
102  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
103  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 86  input mode under Windows. */ Line 105  input mode under Windows. */
105  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
106  #endif  #endif
107    
108    #define PRIV(name) name
109    
110  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
111  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 97  here before pcre_internal.h so that the Line 117  here before pcre_internal.h so that the
117  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
118    
119  #include "pcre.h"  #include "pcre.h"
120    
121    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122    /* Configure internal macros to 16 bit mode. */
123    #define COMPILE_PCRE16
124    #endif
125    
126  #include "pcre_internal.h"  #include "pcre_internal.h"
127    
128    /* The pcre_printint() function, which prints the internal form of a compiled
129    regex, is held in a separate file so that (a) it can be compiled in either
130    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131    when that is compiled in debug mode. */
132    
133    #ifdef SUPPORT_PCRE8
134    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135    #endif
136    #ifdef SUPPORT_PCRE16
137    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138    #endif
139    
140  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
141  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
142  external symbols to prevent clashes. */  external symbols to prevent clashes. */
143    
144  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
145    
146  #include "pcre_tables.c"  #include "pcre_tables.c"
147    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
148  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
149  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
150  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
151  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
152  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
153    
154  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #ifdef EBCDIC
155    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156    #else
157    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158    #endif
159    
160    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161    
162    /* Posix support is disabled in 16 bit only mode. */
163    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164    #define NOPOSIX
165    #endif
166    
167  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
168  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 140  Makefile. */ Line 172  Makefile. */
172  #include "pcreposix.h"  #include "pcreposix.h"
173  #endif  #endif
174    
175  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
176  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
179  UTF8 support if PCRE is built without it. */  
180    #ifndef SUPPORT_UTF
181  #ifndef SUPPORT_UTF8  #ifndef NOUTF
182  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
183  #endif  #endif
184  #endif  #endif
185    
186    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188    only from one place and is handled differently). I couldn't dream up any way of
189    using a single macro to do this in a generic way, because of the many different
190    argument requirements. We know that at least one of SUPPORT_PCRE8 and
191    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192    use these in the definitions of generic macros.
193    
194    **** Special note about the PCHARSxxx macros: the address of the string to be
195    printed is always given as two arguments: a base address followed by an offset.
196    The base address is cast to the correct data size for 8 or 16 bit data; the
197    offset is in units of this size. If the string were given as base+offset in one
198    argument, the casting might be incorrectly applied. */
199    
200    #ifdef SUPPORT_PCRE8
201    
202    #define PCHARS8(lv, p, offset, len, f) \
203      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204    
205    #define PCHARSV8(p, offset, len, f) \
206      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207    
208    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209      p = read_capture_name8(p, cn8, re)
210    
211    #define STRLEN8(p) ((int)strlen((char *)p))
212    
213    #define SET_PCRE_CALLOUT8(callout) \
214      pcre_callout = callout
215    
216    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217       pcre_assign_jit_stack(extra, callback, userdata)
218    
219    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220      re = pcre_compile((char *)pat, options, error, erroffset, tables)
221    
222    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223        namesptr, cbuffer, size) \
224      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225        (char *)namesptr, cbuffer, size)
226    
227    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229    
230    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231        offsets, size_offsets, workspace, size_workspace) \
232      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233        offsets, size_offsets, workspace, size_workspace)
234    
235    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236        offsets, size_offsets) \
237      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238        offsets, size_offsets)
239    
240    #define PCRE_FREE_STUDY8(extra) \
241      pcre_free_study(extra)
242    
243    #define PCRE_FREE_SUBSTRING8(substring) \
244      pcre_free_substring(substring)
245    
246    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247      pcre_free_substring_list(listptr)
248    
249    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250        getnamesptr, subsptr) \
251      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252        (char *)getnamesptr, subsptr)
253    
254    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255      n = pcre_get_stringnumber(re, (char *)ptr)
256    
257    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259    
260    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262    
263    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265    
266    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267      pcre_printint(re, outfile, debug_lengths)
268    
269    #define PCRE_STUDY8(extra, re, options, error) \
270      extra = pcre_study(re, options, error)
271    
272    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273      pcre_jit_stack_alloc(startsize, maxsize)
274    
275    #define PCRE_JIT_STACK_FREE8(stack) \
276      pcre_jit_stack_free(stack)
277    
278    #endif /* SUPPORT_PCRE8 */
279    
280    /* -----------------------------------------------------------*/
281    
282    #ifdef SUPPORT_PCRE16
283    
284    #define PCHARS16(lv, p, offset, len, f) \
285      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286    
287    #define PCHARSV16(p, offset, len, f) \
288      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289    
290    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291      p = read_capture_name16(p, cn16, re)
292    
293    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294    
295    #define SET_PCRE_CALLOUT16(callout) \
296      pcre16_callout = (int (*)(pcre16_callout_block *))callout
297    
298    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299      pcre16_assign_jit_stack((pcre16_extra *)extra, \
300        (pcre16_jit_callback)callback, userdata)
301    
302    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304        tables)
305    
306    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307        namesptr, cbuffer, size) \
308      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310    
311    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313        (PCRE_UCHAR16 *)cbuffer, size/2)
314    
315    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316        offsets, size_offsets, workspace, size_workspace) \
317      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319        workspace, size_workspace)
320    
321    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322        offsets, size_offsets) \
323      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324        len, start_offset, options, offsets, size_offsets)
325    
326    #define PCRE_FREE_STUDY16(extra) \
327      pcre16_free_study((pcre16_extra *)extra)
328    
329    #define PCRE_FREE_SUBSTRING16(substring) \
330      pcre16_free_substring((PCRE_SPTR16)substring)
331    
332    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334    
335    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336        getnamesptr, subsptr) \
337      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339    
340    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342    
343    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345        (PCRE_SPTR16 *)(void*)subsptr)
346    
347    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349        (PCRE_SPTR16 **)(void*)listptr)
350    
351    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353        tables)
354    
355    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356      pcre16_printint(re, outfile, debug_lengths)
357    
358    #define PCRE_STUDY16(extra, re, options, error) \
359      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360    
361    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363    
364    #define PCRE_JIT_STACK_FREE16(stack) \
365      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366    
367    #endif /* SUPPORT_PCRE16 */
368    
369    
370    /* ----- Both modes are supported; a runtime test is needed, except for
371    pcre_config(), and the JIT stack functions, when it doesn't matter which
372    version is called. ----- */
373    
374    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375    
376    #define CHAR_SIZE (use_pcre16? 2:1)
377    
378    #define PCHARS(lv, p, offset, len, f) \
379      if (use_pcre16) \
380        PCHARS16(lv, p, offset, len, f); \
381      else \
382        PCHARS8(lv, p, offset, len, f)
383    
384    #define PCHARSV(p, offset, len, f) \
385      if (use_pcre16) \
386        PCHARSV16(p, offset, len, f); \
387      else \
388        PCHARSV8(p, offset, len, f)
389    
390    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391      if (use_pcre16) \
392        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393      else \
394        READ_CAPTURE_NAME8(p, cn8, cn16, re)
395    
396    #define SET_PCRE_CALLOUT(callout) \
397      if (use_pcre16) \
398        SET_PCRE_CALLOUT16(callout); \
399      else \
400        SET_PCRE_CALLOUT8(callout)
401    
402    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403    
404    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405      if (use_pcre16) \
406        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407      else \
408        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409    
410    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411      if (use_pcre16) \
412        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413      else \
414        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415    
416    #define PCRE_CONFIG pcre_config
417    
418    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419        namesptr, cbuffer, size) \
420      if (use_pcre16) \
421        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422          namesptr, cbuffer, size); \
423      else \
424        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425          namesptr, cbuffer, size)
426    
427    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428      if (use_pcre16) \
429        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430      else \
431        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432    
433    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434        offsets, size_offsets, workspace, size_workspace) \
435      if (use_pcre16) \
436        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437          offsets, size_offsets, workspace, size_workspace); \
438      else \
439        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440          offsets, size_offsets, workspace, size_workspace)
441    
442    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443        offsets, size_offsets) \
444      if (use_pcre16) \
445        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446          offsets, size_offsets); \
447      else \
448        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449          offsets, size_offsets)
450    
451    #define PCRE_FREE_STUDY(extra) \
452      if (use_pcre16) \
453        PCRE_FREE_STUDY16(extra); \
454      else \
455        PCRE_FREE_STUDY8(extra)
456    
457    #define PCRE_FREE_SUBSTRING(substring) \
458      if (use_pcre16) \
459        PCRE_FREE_SUBSTRING16(substring); \
460      else \
461        PCRE_FREE_SUBSTRING8(substring)
462    
463    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464      if (use_pcre16) \
465        PCRE_FREE_SUBSTRING_LIST16(listptr); \
466      else \
467        PCRE_FREE_SUBSTRING_LIST8(listptr)
468    
469    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470        getnamesptr, subsptr) \
471      if (use_pcre16) \
472        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473          getnamesptr, subsptr); \
474      else \
475        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476          getnamesptr, subsptr)
477    
478    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479      if (use_pcre16) \
480        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481      else \
482        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483    
484    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485      if (use_pcre16) \
486        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487      else \
488        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489    
490    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491      if (use_pcre16) \
492        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493      else \
494        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495    
496    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497      (use_pcre16 ? \
498         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500    
501    #define PCRE_JIT_STACK_FREE(stack) \
502      if (use_pcre16) \
503        PCRE_JIT_STACK_FREE16(stack); \
504      else \
505        PCRE_JIT_STACK_FREE8(stack)
506    
507    #define PCRE_MAKETABLES \
508      (use_pcre16? pcre16_maketables() : pcre_maketables())
509    
510    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511      if (use_pcre16) \
512        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513      else \
514        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515    
516    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517      if (use_pcre16) \
518        PCRE_PRINTINT16(re, outfile, debug_lengths); \
519      else \
520        PCRE_PRINTINT8(re, outfile, debug_lengths)
521    
522    #define PCRE_STUDY(extra, re, options, error) \
523      if (use_pcre16) \
524        PCRE_STUDY16(extra, re, options, error); \
525      else \
526        PCRE_STUDY8(extra, re, options, error)
527    
528    /* ----- Only 8-bit mode is supported ----- */
529    
530    #elif defined SUPPORT_PCRE8
531    #define CHAR_SIZE                 1
532    #define PCHARS                    PCHARS8
533    #define PCHARSV                   PCHARSV8
534    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
535    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
536    #define STRLEN                    STRLEN8
537    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
538    #define PCRE_COMPILE              PCRE_COMPILE8
539    #define PCRE_CONFIG               pcre_config
540    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
542    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
543    #define PCRE_EXEC                 PCRE_EXEC8
544    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
545    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
546    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
547    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
548    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
549    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
550    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
551    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
552    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
553    #define PCRE_MAKETABLES           pcre_maketables()
554    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555    #define PCRE_PRINTINT             PCRE_PRINTINT8
556    #define PCRE_STUDY                PCRE_STUDY8
557    
558    /* ----- Only 16-bit mode is supported ----- */
559    
560    #else
561    #define CHAR_SIZE                 2
562    #define PCHARS                    PCHARS16
563    #define PCHARSV                   PCHARSV16
564    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
565    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
566    #define STRLEN                    STRLEN16
567    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
568    #define PCRE_COMPILE              PCRE_COMPILE16
569    #define PCRE_CONFIG               pcre16_config
570    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
572    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
573    #define PCRE_EXEC                 PCRE_EXEC16
574    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
575    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
576    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
577    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
578    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
579    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
580    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
581    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
582    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
583    #define PCRE_MAKETABLES           pcre16_maketables()
584    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585    #define PCRE_PRINTINT             PCRE_PRINTINT16
586    #define PCRE_STUDY                PCRE_STUDY16
587    #endif
588    
589    /* ----- End of mode-specific function call macros ----- */
590    
591    
592  /* Other parameters */  /* Other parameters */
593    
# Line 179  static int debug_lengths; Line 615  static int debug_lengths;
615  static int first_callout;  static int first_callout;
616  static int locale_set = 0;  static int locale_set = 0;
617  static int show_malloc;  static int show_malloc;
618  static int use_utf8;  static int use_utf;
619  static size_t gotten_store;  static size_t gotten_store;
620    static size_t first_gotten_store = 0;
621    static const unsigned char *last_callout_mark = NULL;
622    
623  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
624    
625  static int buffer_size = 50000;  static int buffer_size = 50000;
626  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
627  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
628  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
629    
630    /* Another buffer is needed translation to 16-bit character strings. It will
631    obtained and extended as required. */
632    
633    #ifdef SUPPORT_PCRE16
634    static int buffer16_size = 0;
635    static pcre_uint16 *buffer16 = NULL;
636    
637    #ifdef SUPPORT_PCRE8
638    
639    /* We need the table of operator lengths that is used for 16-bit compiling, in
640    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
641    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
642    appropriately for the 16-bit world. Just as a safety check, make sure that
643    COMPILE_PCRE16 is *not* set. */
644    
645    #ifdef COMPILE_PCRE16
646    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
647    #endif
648    
649    #if LINK_SIZE == 2
650    #undef LINK_SIZE
651    #define LINK_SIZE 1
652    #elif LINK_SIZE == 3 || LINK_SIZE == 4
653    #undef LINK_SIZE
654    #define LINK_SIZE 2
655    #else
656    #error LINK_SIZE must be either 2, 3, or 4
657    #endif
658    
659    #undef IMM2_SIZE
660    #define IMM2_SIZE 1
661    
662    #endif /* SUPPORT_PCRE8 */
663    
664    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
665    #endif  /* SUPPORT_PCRE16 */
666    
667    /* If we have 8-bit support, default use_pcre16 to false; if there is also
668    16-bit support, it can be changed by an option. If there is no 8-bit support,
669    there must be 16-bit support, so default it to 1. */
670    
671    #ifdef SUPPORT_PCRE8
672    static int use_pcre16 = 0;
673    #else
674    static int use_pcre16 = 1;
675    #endif
676    
677    /* Textual explanations for runtime error codes */
678    
679    static const char *errtexts[] = {
680      NULL,  /* 0 is no error */
681      NULL,  /* NOMATCH is handled specially */
682      "NULL argument passed",
683      "bad option value",
684      "magic number missing",
685      "unknown opcode - pattern overwritten?",
686      "no more memory",
687      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
688      "match limit exceeded",
689      "callout error code",
690      NULL,  /* BADUTF8/16 is handled specially */
691      NULL,  /* BADUTF8/16 offset is handled specially */
692      NULL,  /* PARTIAL is handled specially */
693      "not used - internal error",
694      "internal error - pattern overwritten?",
695      "bad count value",
696      "item unsupported for DFA matching",
697      "backreference condition or recursion test not supported for DFA matching",
698      "match limit not supported for DFA matching",
699      "workspace size exceeded in DFA matching",
700      "too much recursion for DFA matching",
701      "recursion limit exceeded",
702      "not used - internal error",
703      "invalid combination of newline options",
704      "bad offset value",
705      NULL,  /* SHORTUTF8/16 is handled specially */
706      "nested recursion at the same subject position",
707      "JIT stack limit reached",
708      "pattern compiled in wrong mode: 8-bit/16-bit error"
709    };
710    
711    
712  /*************************************************  /*************************************************
# Line 202  the L (locale) option also adjusts the t Line 721  the L (locale) option also adjusts the t
721  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
722  only ASCII characters. */  only ASCII characters. */
723    
724  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
725    
726  /* This table is a lower casing table. */  /* This table is a lower casing table. */
727    
# Line 375  graph, print, punct, and cntrl. Other cl Line 894  graph, print, punct, and cntrl. Other cl
894  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
895  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
896    
897  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
898  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
899  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
900  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 538  return sys_errlist[n]; Line 1057  return sys_errlist[n];
1057  #endif /* HAVE_STRERROR */  #endif /* HAVE_STRERROR */
1058    
1059    
1060    /*************************************************
1061    *         JIT memory callback                    *
1062    *************************************************/
1063    
1064    static pcre_jit_stack* jit_callback(void *arg)
1065    {
1066    return (pcre_jit_stack *)arg;
1067    }
1068    
1069    
1070    #if !defined NOUTF || defined SUPPORT_PCRE16
1071    /*************************************************
1072    *            Convert UTF-8 string to value       *
1073    *************************************************/
1074    
1075    /* This function takes one or more bytes that represents a UTF-8 character,
1076    and returns the value of the character.
1077    
1078    Argument:
1079      utf8bytes   a pointer to the byte vector
1080      vptr        a pointer to an int to receive the value
1081    
1082    Returns:      >  0 => the number of bytes consumed
1083                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1084    */
1085    
1086    static int
1087    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1088    {
1089    int c = *utf8bytes++;
1090    int d = c;
1091    int i, j, s;
1092    
1093    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1094      {
1095      if ((d & 0x80) == 0) break;
1096      d <<= 1;
1097      }
1098    
1099    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1100    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1101    
1102    /* i now has a value in the range 1-5 */
1103    
1104    s = 6*i;
1105    d = (c & utf8_table3[i]) << s;
1106    
1107    for (j = 0; j < i; j++)
1108      {
1109      c = *utf8bytes++;
1110      if ((c & 0xc0) != 0x80) return -(j+1);
1111      s -= 6;
1112      d |= (c & 0x3f) << s;
1113      }
1114    
1115    /* Check that encoding was the correct unique one */
1116    
1117    for (j = 0; j < utf8_table1_size; j++)
1118      if (d <= utf8_table1[j]) break;
1119    if (j != i) return -(i+1);
1120    
1121    /* Valid value */
1122    
1123    *vptr = d;
1124    return i+1;
1125    }
1126    #endif /* NOUTF || SUPPORT_PCRE16 */
1127    
1128    
1129    
1130    #if !defined NOUTF || defined SUPPORT_PCRE16
1131    /*************************************************
1132    *       Convert character value to UTF-8         *
1133    *************************************************/
1134    
1135    /* This function takes an integer value in the range 0 - 0x7fffffff
1136    and encodes it as a UTF-8 character in 0 to 6 bytes.
1137    
1138    Arguments:
1139      cvalue     the character value
1140      utf8bytes  pointer to buffer for result - at least 6 bytes long
1141    
1142    Returns:     number of characters placed in the buffer
1143    */
1144    
1145    static int
1146    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1147    {
1148    register int i, j;
1149    for (i = 0; i < utf8_table1_size; i++)
1150      if (cvalue <= utf8_table1[i]) break;
1151    utf8bytes += i;
1152    for (j = i; j > 0; j--)
1153     {
1154     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1155     cvalue >>= 6;
1156     }
1157    *utf8bytes = utf8_table2[i] | cvalue;
1158    return i + 1;
1159    }
1160    #endif
1161    
1162    
1163    #ifdef SUPPORT_PCRE16
1164    /*************************************************
1165    *         Convert a string to 16-bit             *
1166    *************************************************/
1167    
1168    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1169    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1170    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1171    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1172    result is always left in buffer16.
1173    
1174    Note that this function does not object to surrogate values. This is
1175    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1176    for the purpose of testing that they are correctly faulted.
1177    
1178    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1179    in UTF-8 so that values greater than 255 can be handled.
1180    
1181    Arguments:
1182      data       TRUE if converting a data line; FALSE for a regex
1183      p          points to a byte string
1184      utf        true if UTF-8 (to be converted to UTF-16)
1185      len        number of bytes in the string (excluding trailing zero)
1186    
1187    Returns:     number of 16-bit data items used (excluding trailing zero)
1188                 OR -1 if a UTF-8 string is malformed
1189                 OR -2 if a value > 0x10ffff is encountered
1190                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1191    */
1192    
1193    static int
1194    to16(int data, pcre_uint8 *p, int utf, int len)
1195    {
1196    pcre_uint16 *pp;
1197    
1198    if (buffer16_size < 2*len + 2)
1199      {
1200      if (buffer16 != NULL) free(buffer16);
1201      buffer16_size = 2*len + 2;
1202      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1203      if (buffer16 == NULL)
1204        {
1205        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1206        exit(1);
1207        }
1208      }
1209    
1210    pp = buffer16;
1211    
1212    if (!utf && !data)
1213      {
1214      while (len-- > 0) *pp++ = *p++;
1215      }
1216    
1217    else
1218      {
1219      int c = 0;
1220      while (len > 0)
1221        {
1222        int chlen = utf82ord(p, &c);
1223        if (chlen <= 0) return -1;
1224        if (c > 0x10ffff) return -2;
1225        p += chlen;
1226        len -= chlen;
1227        if (c < 0x10000) *pp++ = c; else
1228          {
1229          if (!utf) return -3;
1230          c -= 0x10000;
1231          *pp++ = 0xD800 | (c >> 10);
1232          *pp++ = 0xDC00 | (c & 0x3ff);
1233          }
1234        }
1235      }
1236    
1237    *pp = 0;
1238    return pp - buffer16;
1239    }
1240    #endif
1241    
1242    
1243  /*************************************************  /*************************************************
# Line 563  Returns: pointer to the start of n Line 1263  Returns: pointer to the start of n
1263                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1264  */  */
1265    
1266  static uschar *  static pcre_uint8 *
1267  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1268  {  {
1269  uschar *here = start;  pcre_uint8 *here = start;
1270    
1271  for (;;)  for (;;)
1272    {    {
1273    int rlen = (int)(buffer_size - (here - buffer));    size_t rlen = (size_t)(buffer_size - (here - buffer));
1274    
1275    if (rlen > 1000)    if (rlen > 1000)
1276      {      {
# Line 613  for (;;) Line 1313  for (;;)
1313    else    else
1314      {      {
1315      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1316      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1317      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1318      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319    
1320      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1321        {        {
# Line 646  return NULL; /* Control never gets here Line 1346  return NULL; /* Control never gets here
1346    
1347    
1348    
   
   
   
   
1349  /*************************************************  /*************************************************
1350  *          Read number from string               *  *          Read number from string               *
1351  *************************************************/  *************************************************/
# Line 666  Returns: the unsigned long Line 1362  Returns: the unsigned long
1362  */  */
1363    
1364  static int  static int
1365  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1366  {  {
1367  int result = 0;  int result = 0;
1368  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 677  return(result); Line 1373  return(result);
1373    
1374    
1375    
   
1376  /*************************************************  /*************************************************
1377  *            Convert UTF-8 string to value       *  *             Print one character                *
1378  *************************************************/  *************************************************/
1379    
1380  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
1381    
1382  static int  static int pchar(int c, FILE *f)
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1383  {  {
1384  int c = *utf8bytes++;  if (PRINTOK(c))
1385  int d = c;    {
1386  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1387      return 1;
1388      }
1389    
1390  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1391    {    {
1392    if ((d & 0x80) == 0) break;    if (use_utf)
1393    d <<= 1;      {
1394        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1395        return 6;
1396        }
1397      else
1398        {
1399        if (f != NULL) fprintf(f, "\\x%02x", c);
1400        return 4;
1401        }
1402    }    }
1403    
1404  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1405  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1406           (c <= 0x00000fff)? 7 :
1407           (c <= 0x0000ffff)? 8 :
1408           (c <= 0x000fffff)? 9 : 10;
1409    }
1410    
 /* i now has a value in the range 1-5 */  
1411    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1412    
1413  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1414    {  /*************************************************
1415    c = *utf8bytes++;  *         Print 8-bit character string           *
1416    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1417    
1418  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1419    If handed a NULL file, just counts chars without printing. */
1420    
1421  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1422    if (d <= utf8_table1[j]) break;  {
1423  if (j != i) return -(i+1);  int c = 0;
1424    int yield = 0;
1425    
1426  /* Valid value */  if (length < 0)
1427      length = strlen((char *)p);
1428    
1429  *vptr = d;  while (length-- > 0)
1430  return i+1;    {
1431  }  #if !defined NOUTF
1432      if (use_utf)
1433        {
1434        int rc = utf82ord(p, &c);
1435        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1436          {
1437          length -= rc - 1;
1438          p += rc;
1439          yield += pchar(c, f);
1440          continue;
1441          }
1442        }
1443    #endif
1444      c = *p++;
1445      yield += pchar(c, f);
1446      }
1447    
1448    return yield;
1449    }
1450  #endif  #endif
1451    
1452    
1453    
1454    #ifdef SUPPORT_PCRE16
1455  /*************************************************  /*************************************************
1456  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1457  *************************************************/  *************************************************/
1458    
1459  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1460  {  {
1461  register int i, j;  int len = 0;
1462  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1463    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1464  }  }
1465    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1466    
1467    
1468    #ifdef SUPPORT_PCRE16
1469  /*************************************************  /*************************************************
1470  *             Print character string             *  *           Print 16-bit character string        *
1471  *************************************************/  *************************************************/
1472    
1473  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1474  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1475    
1476  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1477  {  {
 int c = 0;  
1478  int yield = 0;  int yield = 0;
1479    
1480    if (length < 0)
1481      length = strlen16(p);
1482    
1483  while (length-- > 0)  while (length-- > 0)
1484    {    {
1485  #if !defined NOUTF8    int c = *p++ & 0xffff;
1486    if (use_utf8)  #if !defined NOUTF
1487      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1488      {      {
1489      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1490        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1491        {        {
1492        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1493        p += rc;        length--;
1494        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1495        }        }
1496      }      }
1497  #endif  #endif
1498      yield += pchar(c, f);
1499      }
1500    
1501     /* Not UTF-8, or malformed UTF-8  */  return yield;
1502    }
1503    #endif  /* SUPPORT_PCRE16 */
1504    
1505    c = *p++;  
1506    if (PRINTHEX(c))  
1507      {  #ifdef SUPPORT_PCRE8
1508      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1509      yield++;  *     Read a capture name (8-bit) and check it   *
1510      }  *************************************************/
1511    else  
1512      {  static pcre_uint8 *
1513      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1514      yield += 4;  {
1515      }  pcre_uint8 *npp = *pp;
1516    while (isalnum(*p)) *npp++ = *p++;
1517    *npp++ = 0;
1518    *npp = 0;
1519    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1520      {
1521      fprintf(outfile, "no parentheses with name \"");
1522      PCHARSV(*pp, 0, -1, outfile);
1523      fprintf(outfile, "\"\n");
1524    }    }
1525    
1526  return yield;  *pp = npp;
1527    return p;
1528    }
1529    #endif  /* SUPPORT_PCRE8 */
1530    
1531    
1532    
1533    #ifdef SUPPORT_PCRE16
1534    /*************************************************
1535    *     Read a capture name (16-bit) and check it  *
1536    *************************************************/
1537    
1538    /* Note that the text being read is 8-bit. */
1539    
1540    static pcre_uint8 *
1541    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1542    {
1543    pcre_uint16 *npp = *pp;
1544    while (isalnum(*p)) *npp++ = *p++;
1545    *npp++ = 0;
1546    *npp = 0;
1547    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1548      {
1549      fprintf(outfile, "no parentheses with name \"");
1550      PCHARSV(*pp, 0, -1, outfile);
1551      fprintf(outfile, "\"\n");
1552      }
1553    *pp = npp;
1554    return p;
1555  }  }
1556    #endif  /* SUPPORT_PCRE16 */
1557    
1558    
1559    
# Line 864  if (callout_extra) Line 1582  if (callout_extra)
1582      else      else
1583        {        {
1584        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1585        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1586          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1587        fprintf(f, "\n");        fprintf(f, "\n");
1588        }        }
# Line 877  printed lengths of the substrings. */ Line 1595  printed lengths of the substrings. */
1595    
1596  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1597    
1598  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1599  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1600    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1601    
1602  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1603    
1604  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1605    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1606    
1607  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 920  fprintf(outfile, "%.*s", (cb->next_item_ Line 1638  fprintf(outfile, "%.*s", (cb->next_item_
1638  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1639  first_callout = 0;  first_callout = 0;
1640    
1641    if (cb->mark != last_callout_mark)
1642      {
1643      if (cb->mark == NULL)
1644        fprintf(outfile, "Latest Mark: <unset>\n");
1645      else
1646        {
1647        fprintf(outfile, "Latest Mark: ");
1648        PCHARSV(cb->mark, 0, -1, outfile);
1649        putc('\n', outfile);
1650        }
1651      last_callout_mark = cb->mark;
1652      }
1653    
1654  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1655    {    {
1656    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 939  return (cb->callout_number != callout_fa Line 1670  return (cb->callout_number != callout_fa
1670  *            Local malloc functions              *  *            Local malloc functions              *
1671  *************************************************/  *************************************************/
1672    
1673  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1674  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1675    show_malloc variable is set only during matching. */
1676    
1677  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1678  {  {
1679  void *block = malloc(size);  void *block = malloc(size);
1680  gotten_store = size;  gotten_store = size;
1681    if (first_gotten_store == 0) first_gotten_store = size;
1682  if (show_malloc)  if (show_malloc)
1683    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1684  return block;  return block;
# Line 958  if (show_malloc) Line 1691  if (show_malloc)
1691  free(block);  free(block);
1692  }  }
1693    
   
1694  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1695    
1696  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 981  free(block); Line 1713  free(block);
1713  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1714  *************************************************/  *************************************************/
1715    
1716  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1717    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1718    value, but the code is defensive.
1719    
1720    Arguments:
1721      re        compiled regex
1722      study     study data
1723      option    PCRE_INFO_xxx option
1724      ptr       where to put the data
1725    
1726  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  Returns:    0 when OK, < 0 on error
1727    */
1728    
1729    static int
1730    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1731  {  {
1732  int rc;  int rc;
1733  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1734    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1735    #ifdef SUPPORT_PCRE16
1736      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1737    #else
1738      rc = PCRE_ERROR_BADMODE;
1739    #endif
1740    else
1741    #ifdef SUPPORT_PCRE8
1742      rc = pcre_fullinfo(re, study, option, ptr);
1743    #else
1744      rc = PCRE_ERROR_BADMODE;
1745    #endif
1746    
1747    if (rc < 0)
1748      {
1749      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1750        use_pcre16? "16" : "", option);
1751      if (rc == PCRE_ERROR_BADMODE)
1752        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1753          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1754      }
1755    
1756    return rc;
1757  }  }
1758    
1759    
1760    
1761  /*************************************************  /*************************************************
1762  *         Byte flipping function                 *  *             Swap byte functions                *
1763  *************************************************/  *************************************************/
1764    
1765  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1766  byteflip(unsigned long int value, int n)  value, respectively.
1767    
1768    Arguments:
1769      value        any number
1770    
1771    Returns:       the byte swapped value
1772    */
1773    
1774    static pcre_uint32
1775    swap_uint32(pcre_uint32 value)
1776  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1777  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1778         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1779         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1780         ((value & 0xff000000) >> 24);         (value >> 24);
1781    }
1782    
1783    static pcre_uint16
1784    swap_uint16(pcre_uint16 value)
1785    {
1786    return (value >> 8) | (value << 8);
1787  }  }
1788    
1789    
1790    
1791    /*************************************************
1792    *        Flip bytes in a compiled pattern        *
1793    *************************************************/
1794    
1795    /* This function is called if the 'F' option was present on a pattern that is
1796    to be written to a file. We flip the bytes of all the integer fields in the
1797    regex data block and the study block. In 16-bit mode this also flips relevant
1798    bytes in the pattern itself. This is to make it possible to test PCRE's
1799    ability to reload byte-flipped patterns, e.g. those compiled on a different
1800    architecture. */
1801    
1802    static void
1803    regexflip(pcre *ere, pcre_extra *extra)
1804    {
1805    REAL_PCRE *re = (REAL_PCRE *)ere;
1806    #ifdef SUPPORT_PCRE16
1807    int op;
1808    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1809    int length = re->name_count * re->name_entry_size;
1810    #ifdef SUPPORT_UTF
1811    BOOL utf = (re->options & PCRE_UTF16) != 0;
1812    BOOL utf16_char = FALSE;
1813    #endif /* SUPPORT_UTF */
1814    #endif /* SUPPORT_PCRE16 */
1815    
1816    /* Always flip the bytes in the main data block and study blocks. */
1817    
1818    re->magic_number = REVERSED_MAGIC_NUMBER;
1819    re->size = swap_uint32(re->size);
1820    re->options = swap_uint32(re->options);
1821    re->flags = swap_uint16(re->flags);
1822    re->top_bracket = swap_uint16(re->top_bracket);
1823    re->top_backref = swap_uint16(re->top_backref);
1824    re->first_char = swap_uint16(re->first_char);
1825    re->req_char = swap_uint16(re->req_char);
1826    re->name_table_offset = swap_uint16(re->name_table_offset);
1827    re->name_entry_size = swap_uint16(re->name_entry_size);
1828    re->name_count = swap_uint16(re->name_count);
1829    
1830    if (extra != NULL)
1831      {
1832      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1833      rsd->size = swap_uint32(rsd->size);
1834      rsd->flags = swap_uint32(rsd->flags);
1835      rsd->minlength = swap_uint32(rsd->minlength);
1836      }
1837    
1838    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1839    in the name table, if present, and then in the pattern itself. */
1840    
1841    #ifdef SUPPORT_PCRE16
1842    if (!use_pcre16) return;
1843    
1844    while(TRUE)
1845      {
1846      /* Swap previous characters. */
1847      while (length-- > 0)
1848        {
1849        *ptr = swap_uint16(*ptr);
1850        ptr++;
1851        }
1852    #ifdef SUPPORT_UTF
1853      if (utf16_char)
1854        {
1855        if ((ptr[-1] & 0xfc00) == 0xd800)
1856          {
1857          /* We know that there is only one extra character in UTF-16. */
1858          *ptr = swap_uint16(*ptr);
1859          ptr++;
1860          }
1861        }
1862      utf16_char = FALSE;
1863    #endif /* SUPPORT_UTF */
1864    
1865      /* Get next opcode. */
1866    
1867      length = 0;
1868      op = *ptr;
1869      *ptr++ = swap_uint16(op);
1870    
1871      switch (op)
1872        {
1873        case OP_END:
1874        return;
1875    
1876    #ifdef SUPPORT_UTF
1877        case OP_CHAR:
1878        case OP_CHARI:
1879        case OP_NOT:
1880        case OP_NOTI:
1881        case OP_STAR:
1882        case OP_MINSTAR:
1883        case OP_PLUS:
1884        case OP_MINPLUS:
1885        case OP_QUERY:
1886        case OP_MINQUERY:
1887        case OP_UPTO:
1888        case OP_MINUPTO:
1889        case OP_EXACT:
1890        case OP_POSSTAR:
1891        case OP_POSPLUS:
1892        case OP_POSQUERY:
1893        case OP_POSUPTO:
1894        case OP_STARI:
1895        case OP_MINSTARI:
1896        case OP_PLUSI:
1897        case OP_MINPLUSI:
1898        case OP_QUERYI:
1899        case OP_MINQUERYI:
1900        case OP_UPTOI:
1901        case OP_MINUPTOI:
1902        case OP_EXACTI:
1903        case OP_POSSTARI:
1904        case OP_POSPLUSI:
1905        case OP_POSQUERYI:
1906        case OP_POSUPTOI:
1907        case OP_NOTSTAR:
1908        case OP_NOTMINSTAR:
1909        case OP_NOTPLUS:
1910        case OP_NOTMINPLUS:
1911        case OP_NOTQUERY:
1912        case OP_NOTMINQUERY:
1913        case OP_NOTUPTO:
1914        case OP_NOTMINUPTO:
1915        case OP_NOTEXACT:
1916        case OP_NOTPOSSTAR:
1917        case OP_NOTPOSPLUS:
1918        case OP_NOTPOSQUERY:
1919        case OP_NOTPOSUPTO:
1920        case OP_NOTSTARI:
1921        case OP_NOTMINSTARI:
1922        case OP_NOTPLUSI:
1923        case OP_NOTMINPLUSI:
1924        case OP_NOTQUERYI:
1925        case OP_NOTMINQUERYI:
1926        case OP_NOTUPTOI:
1927        case OP_NOTMINUPTOI:
1928        case OP_NOTEXACTI:
1929        case OP_NOTPOSSTARI:
1930        case OP_NOTPOSPLUSI:
1931        case OP_NOTPOSQUERYI:
1932        case OP_NOTPOSUPTOI:
1933        if (utf) utf16_char = TRUE;
1934    #endif
1935        /* Fall through. */
1936    
1937        default:
1938        length = OP_lengths16[op] - 1;
1939        break;
1940    
1941        case OP_CLASS:
1942        case OP_NCLASS:
1943        /* Skip the character bit map. */
1944        ptr += 32/sizeof(pcre_uint16);
1945        length = 0;
1946        break;
1947    
1948        case OP_XCLASS:
1949        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1950        if (LINK_SIZE > 1)
1951          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1952            - (1 + LINK_SIZE + 1));
1953        else
1954          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1955    
1956        /* Reverse the size of the XCLASS instance. */
1957        *ptr = swap_uint16(*ptr);
1958        ptr++;
1959        if (LINK_SIZE > 1)
1960          {
1961          *ptr = swap_uint16(*ptr);
1962          ptr++;
1963          }
1964    
1965        op = *ptr;
1966        *ptr = swap_uint16(op);
1967        ptr++;
1968        if ((op & XCL_MAP) != 0)
1969          {
1970          /* Skip the character bit map. */
1971          ptr += 32/sizeof(pcre_uint16);
1972          length -= 32/sizeof(pcre_uint16);
1973          }
1974        break;
1975        }
1976      }
1977    /* Control should never reach here in 16 bit mode. */
1978    #endif /* SUPPORT_PCRE16 */
1979    }
1980    
1981    
1982    
1983  /*************************************************  /*************************************************
1984  *        Check match or recursion limit          *  *        Check match or recursion limit          *
1985  *************************************************/  *************************************************/
1986    
1987  static int  static int
1988  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1989    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1990    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1991  {  {
# Line 1029  for (;;) Line 2000  for (;;)
2000    {    {
2001    *limit = mid;    *limit = mid;
2002    
2003    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2004      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2005    
2006    if (count == errnumber)    if (count == errnumber)
# Line 1074  Returns: < 0, = 0, or > 0, according Line 2045  Returns: < 0, = 0, or > 0, according
2045  */  */
2046    
2047  static int  static int
2048  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2049  {  {
2050  while (n--)  while (n--)
2051    {    {
# Line 1101  Returns: appropriate PCRE_NEWLINE_x Line 2072  Returns: appropriate PCRE_NEWLINE_x
2072  */  */
2073    
2074  static int  static int
2075  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2076  {  {
2077  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2078  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2079  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2080  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2081  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2082  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2083  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2084  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2085  return 0;  return 0;
2086  }  }
# Line 1131  printf("If input is a terminal, readline Line 2102  printf("If input is a terminal, readline
2102  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2103  #endif  #endif
2104  printf("\nOptions:\n");  printf("\nOptions:\n");
2105  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2106    printf("  -16      use the 16-bit library\n");
2107    #endif
2108    printf("  -b       show compiled code\n");
2109  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2110    printf("  -C arg   show a specific compile-time option\n");
2111    printf("           and exit with its value. The arg can be:\n");
2112    printf("     linksize     internal link size [2, 3, 4]\n");
2113    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2114    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2115    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2116    printf("     ucp          Unicode Properties supported [0, 1]\n");
2117    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2118    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2119  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2120  #if !defined NODFA  #if !defined NODFA
2121  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1147  printf(" -p use POSIX interface\n Line 2130  printf(" -p use POSIX interface\n
2130  #endif  #endif
2131  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2132  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2133  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2134           "  -s+      force each pattern to be studied, using JIT if available\n"
2135         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2136  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2137  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1167  options, followed by a set of test data, Line 2151  options, followed by a set of test data,
2151  int main(int argc, char **argv)  int main(int argc, char **argv)
2152  {  {
2153  FILE *infile = stdin;  FILE *infile = stdin;
2154    const char *version;
2155  int options = 0;  int options = 0;
2156  int study_options = 0;  int study_options = 0;
2157  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1175  int timeit = 0; Line 2160  int timeit = 0;
2160  int timeitm = 0;  int timeitm = 0;
2161  int showinfo = 0;  int showinfo = 0;
2162  int showstore = 0;  int showstore = 0;
2163    int force_study = -1;
2164    int force_study_options = 0;
2165  int quiet = 0;  int quiet = 0;
2166  int size_offsets = 45;  int size_offsets = 45;
2167  int size_offsets_max;  int size_offsets_max;
# Line 1188  int all_use_dfa = 0; Line 2175  int all_use_dfa = 0;
2175  int yield = 0;  int yield = 0;
2176  int stack_size;  int stack_size;
2177    
2178  /* These vectors store, end-to-end, a list of captured substring names. Assume  pcre_jit_stack *jit_stack = NULL;
 that 1024 is plenty long enough for the few names we'll be testing. */  
   
 uschar copynames[1024];  
 uschar getnames[1024];  
2179    
2180  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2181  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2182    that 1024 is plenty long enough for the few names we'll be testing. It is
2183    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2184    for the actual memory, to ensure alignment. */
2185    
2186    pcre_uint16 copynames[1024];
2187    pcre_uint16 getnames[1024];
2188    
2189    #ifdef SUPPORT_PCRE16
2190    pcre_uint16 *cn16ptr;
2191    pcre_uint16 *gn16ptr;
2192    #endif
2193    
2194  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2195  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2196    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2197    pcre_uint8 *cn8ptr;
2198    pcre_uint8 *gn8ptr;
2199    #endif
2200    
2201  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2202  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2203  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2204    
2205    buffer = (pcre_uint8 *)malloc(buffer_size);
2206    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2207    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2208    
2209  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2210    
# Line 1217  it set 0x8000, but then I was advised th Line 2219  it set 0x8000, but then I was advised th
2219  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2220  #endif  #endif
2221    
2222    /* Get the version number: both pcre_version() and pcre16_version() give the
2223    same answer. We just need to ensure that we call one that is available. */
2224    
2225    #ifdef SUPPORT_PCRE8
2226    version = pcre_version();
2227    #else
2228    version = pcre16_version();
2229    #endif
2230    
2231  /* Scan options */  /* Scan options */
2232    
2233  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2234    {    {
2235    unsigned char *endptr;    pcre_uint8 *endptr;
2236    
2237    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-m") == 0) showstore = 1;
2238      showstore = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2239      else if (strcmp(argv[op], "-s+") == 0)
2240        {
2241        force_study = 1;
2242        force_study_options = PCRE_STUDY_JIT_COMPILE;
2243        }
2244      else if (strcmp(argv[op], "-16") == 0)
2245        {
2246    #ifdef SUPPORT_PCRE16
2247        use_pcre16 = 1;
2248    #else
2249        printf("** This version of PCRE was built without 16-bit support\n");
2250        exit(1);
2251    #endif
2252        }
2253    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2254    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
2255    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
# Line 1234  while (argc > 1 && argv[op][0] == '-') Line 2259  while (argc > 1 && argv[op][0] == '-')
2259    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2260  #endif  #endif
2261    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2262        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2263          *endptr == 0))          *endptr == 0))
2264      {      {
2265      op++;      op++;
# Line 1244  while (argc > 1 && argv[op][0] == '-') Line 2269  while (argc > 1 && argv[op][0] == '-')
2269      {      {
2270      int both = argv[op][2] == 0;      int both = argv[op][2] == 0;
2271      int temp;      int temp;
2272      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2273                       *endptr == 0))                       *endptr == 0))
2274        {        {
2275        timeitm = temp;        timeitm = temp;
# Line 1255  while (argc > 1 && argv[op][0] == '-') Line 2280  while (argc > 1 && argv[op][0] == '-')
2280      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2281      }      }
2282    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2283        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2284          *endptr == 0))          *endptr == 0))
2285      {      {
2286  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2287      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2288      exit(1);      exit(1);
2289  #else  #else
# Line 1283  while (argc > 1 && argv[op][0] == '-') Line 2308  while (argc > 1 && argv[op][0] == '-')
2308      {      {
2309      int rc;      int rc;
2310      unsigned long int lrc;      unsigned long int lrc;
2311      printf("PCRE version %s\n", pcre_version());  
2312        if (argc > 2)
2313          {
2314          if (strcmp(argv[op + 1], "linksize") == 0)
2315            {
2316            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2317            printf("%d\n", rc);
2318            yield = rc;
2319            goto EXIT;
2320            }
2321          if (strcmp(argv[op + 1], "pcre8") == 0)
2322            {
2323    #ifdef SUPPORT_PCRE8
2324            printf("1\n");
2325            yield = 1;
2326    #else
2327            printf("0\n");
2328            yield = 0;
2329    #endif
2330            goto EXIT;
2331            }
2332          if (strcmp(argv[op + 1], "pcre16") == 0)
2333            {
2334    #ifdef SUPPORT_PCRE16
2335            printf("1\n");
2336            yield = 1;
2337    #else
2338            printf("0\n");
2339            yield = 0;
2340    #endif
2341            goto EXIT;
2342            }
2343          if (strcmp(argv[op + 1], "utf") == 0)
2344            {
2345    #ifdef SUPPORT_PCRE8
2346            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2347            printf("%d\n", rc);
2348            yield = rc;
2349    #else
2350            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2351            printf("%d\n", rc);
2352            yield = rc;
2353    #endif
2354            goto EXIT;
2355            }
2356          if (strcmp(argv[op + 1], "ucp") == 0)
2357            {
2358            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2359            printf("%d\n", rc);
2360            yield = rc;
2361            goto EXIT;
2362            }
2363          if (strcmp(argv[op + 1], "jit") == 0)
2364            {
2365            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2366            printf("%d\n", rc);
2367            yield = rc;
2368            goto EXIT;
2369            }
2370          if (strcmp(argv[op + 1], "newline") == 0)
2371            {
2372            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2373            /* Note that these values are always the ASCII values, even
2374            in EBCDIC environments. CR is 13 and NL is 10. */
2375            printf("%s\n", (rc == 13)? "CR" :
2376              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2377              (rc == -2)? "ANYCRLF" :
2378              (rc == -1)? "ANY" : "???");
2379            goto EXIT;
2380            }
2381          printf("Unknown -C option: %s\n", argv[op + 1]);
2382          goto EXIT;
2383          }
2384    
2385        printf("PCRE version %s\n", version);
2386      printf("Compiled with\n");      printf("Compiled with\n");
2387    
2388    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2389    are set, either both UTFs are supported or both are not supported. */
2390    
2391    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2392        printf("  8-bit and 16-bit support\n");
2393        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2394        if (rc)
2395          printf("  UTF-8 and UTF-16 support\n");
2396        else
2397          printf("  No UTF-8 or UTF-16 support\n");
2398    #elif defined SUPPORT_PCRE8
2399        printf("  8-bit support only\n");
2400      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2401      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2402      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2403        printf("  16-bit support only\n");
2404        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2405        printf("  %sUTF-16 support\n", rc? "" : "No ");
2406    #endif
2407    
2408        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2409      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2410      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2411        if (rc)
2412          {
2413          const char *arch;
2414          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2415          printf("  Just-in-time compiler support: %s\n", arch);
2416          }
2417        else
2418          printf("  No just-in-time compiler support\n");
2419        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2420      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2421      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2422      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2423        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2424        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2425        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2426      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2427      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2428                                       "all Unicode newlines");                                       "all Unicode newlines");
2429      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2430      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2431      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2432      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2433      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2434      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2435      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2436      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2437      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2438      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2439        if (showstore)
2440          {
2441          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2442          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2443          }
2444        printf("\n");
2445      goto EXIT;      goto EXIT;
2446      }      }
2447    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
# Line 1366  if (argc > 2) Line 2499  if (argc > 2)
2499    
2500  /* Set alternative malloc function */  /* Set alternative malloc function */
2501    
2502    #ifdef SUPPORT_PCRE8
2503  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2504  pcre_free = new_free;  pcre_free = new_free;
2505  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2506  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2507    #endif
2508    
2509    #ifdef SUPPORT_PCRE16
2510    pcre16_malloc = new_malloc;
2511    pcre16_free = new_free;
2512    pcre16_stack_malloc = stack_malloc;
2513    pcre16_stack_free = stack_free;
2514    #endif
2515    
2516  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2517    
2518  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2519    
2520  /* Main loop */  /* Main loop */
2521    
# Line 1388  while (!done) Line 2530  while (!done)
2530  #endif  #endif
2531    
2532    const char *error;    const char *error;
2533    unsigned char *markptr;    pcre_uint8 *markptr;
2534    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2535    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2536    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2537      unsigned long int get_options;
2538    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2539    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2540      int do_allcaps = 0;
2541    int do_mark = 0;    int do_mark = 0;
2542    int do_study = 0;    int do_study = 0;
2543      int no_force_study = 0;
2544    int do_debug = debug;    int do_debug = debug;
2545    int do_G = 0;    int do_G = 0;
2546    int do_g = 0;    int do_g = 0;
2547    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2548    int do_showrest = 0;    int do_showrest = 0;
2549      int do_showcaprest = 0;
2550    int do_flip = 0;    int do_flip = 0;
2551    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2552    
2553    use_utf8 = 0;    use_utf = 0;
2554    debug_lengths = 1;    debug_lengths = 1;
2555    
2556    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1419  while (!done) Line 2565  while (!done)
2565    
2566    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2567      {      {
2568      unsigned long int magic, get_options;      pcre_uint32 magic;
2569      uschar sbuf[8];      pcre_uint8 sbuf[8];
2570      FILE *f;      FILE *f;
2571    
2572      p++;      p++;
2573        if (*p == '!')
2574          {
2575          do_debug = TRUE;
2576          do_showinfo = TRUE;
2577          p++;
2578          }
2579    
2580      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2581      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2582      *pp = 0;      *pp = 0;
# Line 1435  while (!done) Line 2588  while (!done)
2588        continue;        continue;
2589        }        }
2590    
2591        first_gotten_store = 0;
2592      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2593    
2594      true_size =      true_size =
# Line 1442  while (!done) Line 2596  while (!done)
2596      true_study_size =      true_study_size =
2597        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2598    
2599      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2600      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2601    
2602      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2603    
2604      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2605      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2606        {        {
2607        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2608          {          {
2609          do_flip = 1;          do_flip = 1;
2610          }          }
# Line 1462  while (!done) Line 2616  while (!done)
2616          }          }
2617        }        }
2618    
2619      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2620        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2621          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
2622    
2623      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      /* Now see if there is any following study data. */
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
     /* Now see if there is any following study data */  
2624    
2625      if (true_study_size != 0)      if (true_study_size != 0)
2626        {        {
# Line 1486  while (!done) Line 2636  while (!done)
2636          {          {
2637          FAIL_READ:          FAIL_READ:
2638          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2639          if (extra != NULL) new_free(extra);          if (extra != NULL)
2640              {
2641              PCRE_FREE_STUDY(extra);
2642              }
2643          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2644          fclose(f);          fclose(f);
2645          continue;          continue;
# Line 1496  while (!done) Line 2649  while (!done)
2649        }        }
2650      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2651    
2652        /* Flip the necessary bytes. */
2653        if (do_flip)
2654          {
2655          int rc;
2656          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2657          if (rc == PCRE_ERROR_BADMODE)
2658            {
2659            /* Simulate the result of the function call below. */
2660            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2661              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2662            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2663              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2664            continue;
2665            }
2666          }
2667    
2668        /* Need to know if UTF-8 for printing data strings. */
2669    
2670        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2671        use_utf = (get_options & PCRE_UTF8) != 0;
2672    
2673      fclose(f);      fclose(f);
2674      goto SHOW_INFO;      goto SHOW_INFO;
2675      }      }
2676    
2677    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2678    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2679    
2680    delimiter = *p++;    delimiter = *p++;
2681    
# Line 1566  while (!done) Line 2740  while (!done)
2740        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2741        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2742    
2743        case '+': do_showrest = 1; break;        case '+':
2744          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2745          break;
2746    
2747          case '=': do_allcaps = 1; break;
2748        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2749        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2750        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1584  while (!done) Line 2762  while (!done)
2762        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2763  #endif  #endif
2764    
2765        case 'S': do_study = 1; break;        case 'S':
2766          if (do_study == 0)
2767            {
2768            do_study = 1;
2769            if (*pp == '+')
2770              {
2771              study_options |= PCRE_STUDY_JIT_COMPILE;
2772              pp++;
2773              }
2774            }
2775          else
2776            {
2777            do_study = 0;
2778            no_force_study = 1;
2779            }
2780          break;
2781    
2782        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2783        case 'W': options |= PCRE_UCP; break;        case 'W': options |= PCRE_UCP; break;
2784        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2785          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2786        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2787        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2788        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2789    
2790        case 'T':        case 'T':
# Line 1623  while (!done) Line 2818  while (!done)
2818          goto SKIP_DATA;          goto SKIP_DATA;
2819          }          }
2820        locale_set = 1;        locale_set = 1;
2821        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2822        pp = ppp;        pp = ppp;
2823        break;        break;
2824    
# Line 1636  while (!done) Line 2831  while (!done)
2831    
2832        case '<':        case '<':
2833          {          {
2834          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2835            {            {
2836            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2837            pp += 3;            pp += 3;
# Line 1664  while (!done) Line 2859  while (!done)
2859    
2860    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2861    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2862    local character tables. */    local character tables. Neither does it have 16-bit support. */
2863    
2864  #if !defined NOPOSIX  #if !defined NOPOSIX
2865    if (posix || do_posix)    if (posix || do_posix)
# Line 1680  while (!done) Line 2875  while (!done)
2875      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2876      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2877    
2878        first_gotten_store = 0;
2879      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2880    
2881      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1699  while (!done) Line 2895  while (!done)
2895  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2896    
2897      {      {
2898      unsigned long int get_options;      /* In 16-bit mode, convert the input. */
2899    
2900    #ifdef SUPPORT_PCRE16
2901        if (use_pcre16)
2902          {
2903          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2904            {
2905            case -1:
2906            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2907              "converted to UTF-16\n");
2908            goto SKIP_DATA;
2909    
2910            case -2:
2911            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2912              "cannot be converted to UTF-16\n");
2913            goto SKIP_DATA;
2914    
2915            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2916            fprintf(outfile, "**Failed: character value greater than 0xffff "
2917              "cannot be converted to 16-bit in non-UTF mode\n");
2918            goto SKIP_DATA;
2919    
2920            default:
2921            break;
2922            }
2923          p = (pcre_uint8 *)buffer16;
2924          }
2925    #endif
2926    
2927        /* Compile many times when timing */
2928    
2929      if (timeit > 0)      if (timeit > 0)
2930        {        {
# Line 1708  while (!done) Line 2933  while (!done)
2933        clock_t start_time = clock();        clock_t start_time = clock();
2934        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2935          {          {
2936          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2937          if (re != NULL) free(re);          if (re != NULL) free(re);
2938          }          }
2939        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1717  while (!done) Line 2942  while (!done)
2942            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2943        }        }
2944    
2945      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2946        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2947    
2948      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2949      if non-interactive. */      if non-interactive. */
# Line 1748  while (!done) Line 2974  while (!done)
2974      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
2975      lines. */      lines. */
2976    
2977      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2978      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
2979        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
2980    
2981      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
2982      and remember the store that was got. */      and remember the store that was got. */
2983    
2984      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
2985      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2986    
2987      /* If /S was present, study the regexp to generate additional info to      /* Output code size information if requested */
2988      help with the matching. */  
2989        if (log_store)
2990          fprintf(outfile, "Memory allocation (code space): %d\n",
2991            (int)(first_gotten_store -
2992                  sizeof(REAL_PCRE) -
2993                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2994    
2995        /* If -s or /S was present, study the regex to generate additional info to
2996        help with the matching, unless the pattern has the SS option, which
2997        suppresses the effect of /S (used for a few test patterns where studying is
2998        never sensible). */
2999    
3000      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3001        {        {
3002        if (timeit > 0)        if (timeit > 0)
3003          {          {
# Line 1778  while (!done) Line 3005  while (!done)
3005          clock_t time_taken;          clock_t time_taken;
3006          clock_t start_time = clock();          clock_t start_time = clock();
3007          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3008            extra = pcre_study(re, study_options, &error);            {
3009              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3010              }
3011          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3012          if (extra != NULL) free(extra);          if (extra != NULL)
3013              {
3014              PCRE_FREE_STUDY(extra);
3015              }
3016          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3017            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3018              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3019          }          }
3020        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3021        if (error != NULL)        if (error != NULL)
3022          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3023        else if (extra != NULL)        else if (extra != NULL)
3024            {
3025          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3026            if (log_store)
3027              {
3028              size_t jitsize;
3029              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3030                  jitsize != 0)
3031                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3032              }
3033            }
3034        }        }
3035    
3036      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1805  while (!done) Line 3046  while (!done)
3046        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3047        }        }
3048    
3049      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3050    
3051      SHOW_INFO:      SHOW_INFO:
3052    
3053      if (do_debug)      if (do_debug)
3054        {        {
3055        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3056        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3057        }        }
3058    
3059      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1857  while (!done) Line 3061  while (!done)
3061      if (do_showinfo)      if (do_showinfo)
3062        {        {
3063        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3064        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3065          hascrorlf;          hascrorlf;
3066        int nameentrysize, namecount;        int nameentrysize, namecount;
3067        const uschar *nametable;        const pcre_uint8 *nametable;
3068    
3069        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3070        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3071        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3072        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3073        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3074        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3075        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3076        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3077        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3078        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3079        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3080              != 0)
3081  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3082    
3083        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3084          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1910  while (!done) Line 3093  while (!done)
3093          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3094          while (namecount-- > 0)          while (namecount-- > 0)
3095            {            {
3096            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3097              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3098              GET2(nametable, 0));  #else
3099              int imm2_size = IMM2_SIZE;
3100    #endif
3101              int length = (int)STRLEN(nametable + imm2_size);
3102              fprintf(outfile, "  ");
3103              PCHARSV(nametable, imm2_size, length, outfile);
3104              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3105    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3106              fprintf(outfile, "%3d\n", use_pcre16?
3107                 (int)(((PCRE_SPTR16)nametable)[0])
3108                :((int)nametable[0] << 8) | (int)nametable[1]);
3109              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3110    #else
3111              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3112    #ifdef SUPPORT_PCRE8
3113            nametable += nameentrysize;            nametable += nameentrysize;
3114    #else
3115              nametable += nameentrysize * 2;
3116    #endif
3117    #endif
3118            }            }
3119          }          }
3120    
3121        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3122        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3123    
3124        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3125        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3126    
3127        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3128          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3129            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3130            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3131            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1937  while (!done) Line 3138  while (!done)
3138            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3139            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3140            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3141            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3142            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3143            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3144              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3145            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3146    
3147        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
# Line 1980  while (!done) Line 3182  while (!done)
3182          }          }
3183        else        else
3184          {          {
3185          int ch = first_char & 255;          const char *caseless =
3186          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3187            "" : " (caseless)";            "" : " (caseless)";
3188          if (PRINTHEX(ch))  
3189            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3190              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3191          else          else
3192            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3193              fprintf(outfile, "First char = ");
3194              pchar(first_char, outfile);
3195              fprintf(outfile, "%s\n", caseless);
3196              }
3197          }          }
3198    
3199        if (need_char < 0)        if (need_char < 0)
# Line 1995  while (!done) Line 3202  while (!done)
3202          }          }
3203        else        else
3204          {          {
3205          int ch = need_char & 255;          const char *caseless =
3206          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3207            "" : " (caseless)";            "" : " (caseless)";
3208          if (PRINTHEX(ch))  
3209            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3210              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3211          else          else
3212            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3213              fprintf(outfile, "Need char = ");
3214              pchar(need_char, outfile);
3215              fprintf(outfile, "%s\n", caseless);
3216              }
3217          }          }
3218    
3219        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3220        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3221        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3222        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3223          information unless -i or -d was also present. This means that, except
3224          when auto-callouts are involved, the output from runs with and without
3225          -s should be identical. */
3226    
3227        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3228          {          {
3229          if (extra == NULL)          if (extra == NULL)
3230            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3231          else          else
3232            {            {
3233            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3234            int minlength;            int minlength;
3235    
3236            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3237            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3238    
3239            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3240              {              {
3241              int i;              if (start_bits == NULL)
3242              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3243              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3244                {                {
3245                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3246                  int c = 24;
3247                  fprintf(outfile, "Starting byte set: ");
3248                  for (i = 0; i < 256; i++)
3249                  {                  {
3250                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
3251                    {                    {
3252                    fprintf(outfile, "%c ", i);                    if (c > 75)
3253                    c += 2;                      {
3254                    }                      fprintf(outfile, "\n  ");
3255                  else                      c = 2;
3256                    {                      }
3257                    fprintf(outfile, "\\x%02x ", i);                    if (PRINTOK(i) && i != ' ')
3258                    c += 5;                      {
3259                        fprintf(outfile, "%c ", i);
3260                        c += 2;
3261                        }
3262                      else
3263                        {
3264                        fprintf(outfile, "\\x%02x ", i);
3265                        c += 5;
3266                        }
3267                    }                    }
3268                  }                  }
3269                  fprintf(outfile, "\n");
3270                }                }
3271              fprintf(outfile, "\n");              }
3272              }
3273    
3274            /* Show this only if the JIT was set by /S, not by -s. */
3275    
3276            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3277              {
3278              int jit;
3279              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3280                {
3281                if (jit)
3282                  fprintf(outfile, "JIT study was successful\n");
3283                else
3284    #ifdef SUPPORT_JIT
3285                  fprintf(outfile, "JIT study was not successful\n");
3286    #else
3287                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3288    #endif
3289              }              }
3290            }            }
3291          }          }
# Line 2069  while (!done) Line 3304  while (!done)
3304          }          }
3305        else        else
3306          {          {
3307          uschar sbuf[8];          pcre_uint8 sbuf[8];
3308          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3309          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3310          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3311          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3312            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3313          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3314          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3315          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3316          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3317            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3318    
3319          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3320              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2087  while (!done) Line 3323  while (!done)
3323            }            }
3324          else          else
3325            {            {
3326            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3327    
3328              /* If there is study data, write it. */
3329    
3330            if (extra != NULL)            if (extra != NULL)
3331              {              {
3332              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 2097  while (!done) Line 3336  while (!done)
3336                  strerror(errno));                  strerror(errno));
3337                }                }
3338              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3339              }              }
3340            }            }
3341          fclose(f);          fclose(f);
3342          }          }
3343    
3344        new_free(re);        new_free(re);
3345        if (extra != NULL) new_free(extra);        if (extra != NULL)
3346            {
3347            PCRE_FREE_STUDY(extra);
3348            }
3349        if (locale_set)        if (locale_set)
3350          {          {
3351          new_free((void *)tables);          new_free((void *)tables);
# Line 2119  while (!done) Line 3360  while (!done)
3360    
3361    for (;;)    for (;;)
3362      {      {
3363      uschar *q;      pcre_uint8 *q;
3364      uschar *bptr;      pcre_uint8 *bptr;
3365      int *use_offsets = offsets;      int *use_offsets = offsets;
3366      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3367      int callout_data = 0;      int callout_data = 0;
# Line 2132  while (!done) Line 3373  while (!done)
3373      int getlist = 0;      int getlist = 0;
3374      int gmatched = 0;      int gmatched = 0;
3375      int start_offset = 0;      int start_offset = 0;
3376      int start_offset_sign = 1;      int start_offset_sign = 1;
3377      int g_notempty = 0;      int g_notempty = 0;
3378      int use_dfa = 0;      int use_dfa = 0;
3379    
     options = 0;  
   
3380      *copynames = 0;      *copynames = 0;
3381      *getnames = 0;      *getnames = 0;
3382    
3383      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3384      getnamesptr = getnames;      cn16ptr = copynames;
3385        gn16ptr = getnames;
3386    #endif
3387    #ifdef SUPPORT_PCRE8
3388        cn8ptr = copynames8;
3389        gn8ptr = getnames8;
3390    #endif
3391    
3392      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3393      first_callout = 1;      first_callout = 1;
3394        last_callout_mark = NULL;
3395      callout_extra = 0;      callout_extra = 0;
3396      callout_count = 0;      callout_count = 0;
3397      callout_fail_count = 999999;      callout_fail_count = 999999;
3398      callout_fail_id = -1;      callout_fail_id = -1;
3399      show_malloc = 0;      show_malloc = 0;
3400        options = 0;
3401    
3402      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3403        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 2186  while (!done) Line 3433  while (!done)
3433        int i = 0;        int i = 0;
3434        int n = 0;        int n = 0;
3435    
3436        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3437          In non-UTF mode, allow the value of the byte to fall through to later,
3438          where values greater than 127 are turned into UTF-8 when running in
3439          16-bit mode. */
3440    
3441          if (c != '\\')
3442            {
3443            if (use_utf)
3444              {
3445              *q++ = c;
3446              continue;
3447              }
3448            }
3449    
3450          /* Handle backslash escapes */
3451    
3452          else switch ((c = *p++))
3453          {          {
3454          case 'a': c =    7; break;          case 'a': c =    7; break;
3455          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 2202  while (!done) Line 3465  while (!done)
3465          c -= '0';          c -= '0';
3466          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3467            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3468          break;          break;
3469    
3470          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3471          if (*p == '{')          if (*p == '{')
3472            {            {
3473            unsigned char *pt = p;            pcre_uint8 *pt = p;
3474            c = 0;            c = 0;
3475            while (isxdigit(*(++pt)))  
3476              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3477              when isxdigit() is a macro that refers to its argument more than
3478              once. This is banned by the C Standard, but apparently happens in at
3479              least one MacOS environment. */
3480    
3481              for (pt++; isxdigit(*pt); pt++)
3482                {
3483                if (++i == 9)
3484                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3485                                   "using only the first eight.\n");
3486                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3487                }
3488            if (*pt == '}')            if (*pt == '}')
3489              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3490              p = pt + 1;              p = pt + 1;
3491              break;              break;
3492              }              }
3493            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3494            }            }
 #endif  
3495    
3496          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3497            allows UTF-8 characters to be constructed byte by byte, and also allows
3498            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3499            Otherwise, pass it down to later code so that it can be turned into
3500            UTF-8 when running in 16-bit mode. */
3501    
3502          c = 0;          c = 0;
3503          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3504            {            {
3505            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3506            p++;            p++;
3507            }            }
3508            if (use_utf)
3509              {
3510              *q++ = c;
3511              continue;
3512              }
3513          break;          break;
3514    
3515          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 2265  while (!done) Line 3517  while (!done)
3517          continue;          continue;
3518    
3519          case '>':          case '>':
3520          if (*p == '-')          if (*p == '-')
3521            {            {
3522            start_offset_sign = -1;            start_offset_sign = -1;
3523            p++;            p++;
3524            }            }
3525          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3526          start_offset *= start_offset_sign;          start_offset *= start_offset_sign;
3527          continue;          continue;
3528    
3529          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 2290  while (!done) Line 3542  while (!done)
3542            }            }
3543          else if (isalnum(*p))          else if (isalnum(*p))
3544            {            {
3545            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3546            }            }
3547          else if (*p == '+')          else if (*p == '+')
3548            {            {
# Line 2306  while (!done) Line 3551  while (!done)
3551            }            }
3552          else if (*p == '-')          else if (*p == '-')
3553            {            {
3554            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3555            p++;            p++;
3556            }            }
3557          else if (*p == '!')          else if (*p == '!')
# Line 2360  while (!done) Line 3605  while (!done)
3605            }            }
3606          else if (isalnum(*p))          else if (isalnum(*p))
3607            {            {
3608            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3609            while (isalnum(*p)) *npp++ = *p++;            }
3610            *npp++ = 0;          continue;
3611            *npp = 0;  
3612            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3613            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3614              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3615            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3616                && extra->executable_jit != NULL)
3617              {
3618              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3619              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3620              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3621            }            }
3622          continue;          continue;
3623    
# Line 2463  while (!done) Line 3713  while (!done)
3713            }            }
3714          continue;          continue;
3715          }          }
3716        *q++ = c;  
3717          /* We now have a character value in c that may be greater than 255. In
3718          16-bit mode, we always convert characters to UTF-8 so that values greater
3719          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3720          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3721          mode must have come from \x{...} or octal constructs because values from
3722          \x.. get this far only in non-UTF mode. */
3723    
3724    #if !defined NOUTF || defined SUPPORT_PCRE16
3725          if (use_pcre16 || use_utf)
3726            {
3727            pcre_uint8 buff8[8];
3728            int ii, utn;
3729            utn = ord2utf8(c, buff8);
3730            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3731            }
3732          else
3733    #endif
3734            {
3735            if (c > 255)
3736              {
3737              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3738                "and UTF-8 mode is not enabled.\n", c);
3739              fprintf(outfile, "** Truncation will probably give the wrong "
3740                "result.\n");
3741              }
3742            *q++ = c;
3743            }
3744        }        }
3745    
3746        /* Reached end of subject string */
3747    
3748      *q = 0;      *q = 0;
3749      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
3750    
# Line 2526  while (!done) Line 3806  while (!done)
3806            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3807              {              {
3808              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3809              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3810                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3811              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3812              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3813                {                {
3814                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3815                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3816                  outfile);                  outfile);
3817                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3818                }                }
# Line 2540  while (!done) Line 3820  while (!done)
3820            }            }
3821          }          }
3822        free(pmatch);        free(pmatch);
3823          goto NEXT_DATA;
3824        }        }
3825    
3826    #endif  /* !defined NOPOSIX */
3827    
3828      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3829    
3830      else  #ifdef SUPPORT_PCRE16
3831  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3832          {
3833          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3834          switch(len)
3835            {
3836            case -1:
3837            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3838              "converted to UTF-16\n");
3839            goto NEXT_DATA;
3840    
3841            case -2:
3842            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3843              "cannot be converted to UTF-16\n");
3844            goto NEXT_DATA;
3845    
3846            case -3:
3847            fprintf(outfile, "**Failed: character value greater than 0xffff "
3848              "cannot be converted to 16-bit in non-UTF mode\n");
3849            goto NEXT_DATA;
3850    
3851            default:
3852            break;
3853            }
3854          bptr = (pcre_uint8 *)buffer16;
3855          }
3856    #endif
3857    
3858      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3859        {        {
# Line 2562  while (!done) Line 3870  while (!done)
3870            {            {
3871            int workspace[1000];            int workspace[1000];
3872            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3873              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3874                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3875                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3876                  (sizeof(workspace)/sizeof(int)));
3877                }
3878            }            }
3879          else          else
3880  #endif  #endif
3881    
3882          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3883            count = pcre_exec(re, extra, (char *)bptr, len,            {
3884              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3885                (options | g_notempty), use_offsets, use_size_offsets);
3886              }
3887          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3888          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3889            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2581  while (!done) Line 3892  while (!done)
3892    
3893        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3894        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3895        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3896          running of pcre_exec(), so disable the JIT optimization. This makes it
3897          possible to run the same set of tests with and without JIT externally
3898          requested. */
3899    
3900        if (find_match_limit)        if (find_match_limit)
3901          {          {
# Line 2590  while (!done) Line 3904  while (!done)
3904            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3905            extra->flags = 0;            extra->flags = 0;
3906            }            }
3907            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3908    
3909          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3910            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2613  while (!done) Line 3928  while (!done)
3928            }            }
3929          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3930          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3931          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3932            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3933          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3934          }          }
# Line 2625  while (!done) Line 3940  while (!done)
3940        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3941          {          {
3942          int workspace[1000];          int workspace[1000];
3943          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3944            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3945            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3946          if (count == 0)          if (count == 0)
3947            {            {
3948            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2638  while (!done) Line 3953  while (!done)
3953    
3954        else        else
3955          {          {
3956          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3957            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3958          if (count == 0)          if (count == 0)
3959            {            {
3960            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2652  while (!done) Line 3967  while (!done)
3967        if (count >= 0)        if (count >= 0)
3968          {          {
3969          int i, maxcount;          int i, maxcount;
3970            void *cnptr, *gnptr;
3971    
3972  #if !defined NODFA  #if !defined NODFA
3973          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2673  while (!done) Line 3989  while (!done)
3989              }              }
3990            }            }
3991    
3992            /* do_allcaps requests showing of all captures in the pattern, to check
3993            unset ones at the end. */
3994    
3995            if (do_allcaps)
3996              {
3997              if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3998                goto SKIP_DATA;
3999              count++;   /* Allow for full match */
4000              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4001              }
4002    
4003            /* Output the captured substrings */
4004    
4005          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
4006            {            {
4007            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
4008                {
4009                if (use_offsets[i] != -1)
4010                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4011                    use_offsets[i], i);
4012                if (use_offsets[i+1] != -1)
4013                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4014                    use_offsets[i+1], i+1);
4015              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
4016                }
4017            else            else
4018              {              {
4019              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4020              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4021                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4022              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4023              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
4024                {                {
4025                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
4026                  {                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4027                  fprintf(outfile, " 0+ ");                  outfile);
4028                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
4029                }                }
4030              }              }
4031            }            }
4032    
4033          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
4034              {
4035              fprintf(outfile, "MK: ");
4036              PCHARSV(markptr, 0, -1, outfile);
4037              fprintf(outfile, "\n");
4038              }
4039    
4040          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4041            {            {
4042            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
4043              {              {
4044                int rc;
4045              char copybuffer[256];              char copybuffer[256];
4046              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4047                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
4048              if (rc < 0)              if (rc < 0)
4049                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4050              else              else
4051                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
4052                  fprintf(outfile, "%2dC ", i);
4053                  PCHARSV(copybuffer, 0, rc, outfile);
4054                  fprintf(outfile, " (%d)\n", rc);
4055                  }
4056              }              }
4057            }            }
4058    
4059          for (copynamesptr = copynames;          cnptr = copynames;
4060               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4061            {            {
4062              int rc;
4063            char copybuffer[256];            char copybuffer[256];
4064            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4065              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4066                {
4067                if (*(pcre_uint16 *)cnptr == 0) break;
4068                }
4069              else
4070                {
4071                if (*(pcre_uint8 *)cnptr == 0) break;
4072                }
4073    
4074              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4075                cnptr, copybuffer, sizeof(copybuffer));
4076    
4077            if (rc < 0)            if (rc < 0)
4078              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4079                fprintf(outfile, "copy substring ");
4080                PCHARSV(cnptr, 0, -1, outfile);
4081                fprintf(outfile, " failed %d\n", rc);
4082                }
4083            else            else
4084              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4085                fprintf(outfile, "  C ");
4086                PCHARSV(copybuffer, 0, rc, outfile);
4087                fprintf(outfile, " (%d) ", rc);
4088                PCHARSV(cnptr, 0, -1, outfile);
4089                putc('\n', outfile);
4090                }
4091    
4092              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4093            }            }
4094    
4095          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4096            {            {
4097            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4098              {              {
4099                int rc;
4100              const char *substring;              const char *substring;
4101              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4102              if (rc < 0)              if (rc < 0)
4103                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4104              else              else
4105                {                {
4106                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4107                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4108                  fprintf(outfile, " (%d)\n", rc);
4109                  PCRE_FREE_SUBSTRING(substring);
4110                }                }
4111              }              }
4112            }            }
4113    
4114          for (getnamesptr = getnames;          gnptr = getnames;
4115               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4116            {            {
4117              int rc;
4118            const char *substring;            const char *substring;
4119            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4120              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4121                {
4122                if (*(pcre_uint16 *)gnptr == 0) break;
4123                }
4124              else
4125                {
4126                if (*(pcre_uint8 *)gnptr == 0) break;
4127                }
4128    
4129              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4130                gnptr, &substring);
4131            if (rc < 0)            if (rc < 0)
4132              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4133                fprintf(outfile, "get substring ");
4134                PCHARSV(gnptr, 0, -1, outfile);
4135                fprintf(outfile, " failed %d\n", rc);
4136                }
4137            else            else
4138              {              {
4139              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4140              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4141                fprintf(outfile, " (%d) ", rc);
4142                PCHARSV(gnptr, 0, -1, outfile);
4143                PCRE_FREE_SUBSTRING(substring);
4144                putc('\n', outfile);
4145              }              }
4146    
4147              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4148            }            }
4149    
4150          if (getlist)          if (getlist)
4151            {            {
4152              int rc;
4153            const char **stringlist;            const char **stringlist;
4154            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4155            if (rc < 0)            if (rc < 0)
4156              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4157            else            else
4158              {              {
4159              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4160                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4161                  fprintf(outfile, "%2dL ", i);
4162                  PCHARSV(stringlist[i], 0, -1, outfile);
4163                  putc('\n', outfile);
4164                  }
4165              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4166                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4167              /* free((void *)stringlist); */              PCRE_FREE_SUBSTRING_LIST(stringlist);
             pcre_free_substring_list(stringlist);  
4168              }              }
4169            }            }
4170          }          }
# Line 2782  while (!done) Line 4174  while (!done)
4174        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4175          {          {
4176          if (markptr == NULL) fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4177            else fprintf(outfile, "Partial match, mark=%s", markptr);          else
4178              {
4179              fprintf(outfile, "Partial match, mark=");
4180              PCHARSV(markptr, 0, -1, outfile);
4181              }
4182          if (use_size_offsets > 1)          if (use_size_offsets > 1)
4183            {            {
4184            fprintf(outfile, ": ");            fprintf(outfile, ": ");
4185            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4186              outfile);              outfile);
4187            }            }
4188          fprintf(outfile, "\n");          fprintf(outfile, "\n");
# Line 2799  while (!done) Line 4195  while (!done)
4195        string - that was checked before setting g_notempty.        string - that was checked before setting g_notempty.
4196    
4197        Complication arises in the case when the newline convention is "any",        Complication arises in the case when the newline convention is "any",
4198        "crlf", or "anycrlf". If the previous match was at the end of a line        "crlf", or "anycrlf". If the previous match was at the end of a line
4199        terminated by CRLF, an advance of one character just passes the \r,        terminated by CRLF, an advance of one character just passes the \r,
4200        whereas we should prefer the longer newline sequence, as does the code in        whereas we should prefer the longer newline sequence, as does the code in
4201        pcre_exec(). Fudge the offset value to achieve this. We check for a        pcre_exec(). Fudge the offset value to achieve this. We check for a
4202        newline setting in the pattern; if none was set, use pcre_config() to        newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4203        find the default.        find the default.
4204    
4205        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
# Line 2814  while (!done) Line 4210  while (!done)
4210          if (g_notempty != 0)          if (g_notempty != 0)
4211            {            {
4212            int onechar = 1;            int onechar = 1;
4213            unsigned int obits = ((real_pcre *)re)->options;            unsigned int obits = ((REAL_PCRE *)re)->options;
4214            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
4215            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
4216              {              {
4217              int d;              int d;
4218              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4219              /* Note that these values are always the ASCII ones, even in              /* Note that these values are always the ASCII ones, even in
4220              EBCDIC environments. CR = 13, NL = 10. */              EBCDIC environments. CR = 13, NL = 10. */
4221              obits = (d == 13)? PCRE_NEWLINE_CR :              obits = (d == 13)? PCRE_NEWLINE_CR :
# Line 2833  while (!done) Line 4229  while (!done)
4229                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4230                &&                &&
4231                start_offset < len - 1 &&                start_offset < len - 1 &&
4232                bptr[start_offset] == '\r' &&  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4233                bptr[start_offset+1] == '\n')                (use_pcre16?
4234                       ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4235                    && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4236                  :
4237                       bptr[start_offset] == '\r'
4238                    && bptr[start_offset + 1] == '\n')
4239    #elif defined SUPPORT_PCRE16
4240                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4241                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4242    #else
4243                     bptr[start_offset] == '\r'
4244                  && bptr[start_offset + 1] == '\n'
4245    #endif
4246                  )
4247              onechar++;              onechar++;
4248            else if (use_utf8)            else if (use_utf)
4249              {              {
4250              while (start_offset + onechar < len)              while (start_offset + onechar < len)
4251                {                {
4252                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;                if ((bptr[start_offset+onechar] & 0xc0) != 0x80) break;
4253                onechar++;                onechar++;
4254                }                }
4255              }              }
4256            use_offsets[1] = start_offset + onechar;            use_offsets[1] = start_offset + onechar;
4257            }            }
4258          else          else
4259            {            {
4260            if (count == PCRE_ERROR_NOMATCH)            switch(count)
4261              {              {
4262                case PCRE_ERROR_NOMATCH:
4263              if (gmatched == 0)              if (gmatched == 0)
4264                {                {
4265                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL)
4266                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  {
4267                    fprintf(outfile, "No match\n");
4268                    }
4269                  else
4270                    {
4271                    fprintf(outfile, "No match, mark = ");
4272                    PCHARSV(markptr, 0, -1, outfile);
4273                    putc('\n', outfile);
4274                    }
4275                }                }
4276                break;
4277    
4278                case PCRE_ERROR_BADUTF8:
4279                case PCRE_ERROR_SHORTUTF8:
4280                fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4281                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4282                  use_pcre16? "16" : "8");
4283                if (use_size_offsets >= 2)
4284                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4285                    use_offsets[1]);
4286                fprintf(outfile, "\n");
4287                break;
4288    
4289                case PCRE_ERROR_BADUTF8_OFFSET:
4290                fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4291                  use_pcre16? "16" : "8");
4292                break;
4293    
4294                default:
4295                if (count < 0 &&
4296                    (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4297                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4298                else
4299                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
4300                break;
4301              }              }
4302            else fprintf(outfile, "Error %d\n", count);  
4303            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
4304            }            }
4305          }          }
# Line 2888  while (!done) Line 4331  while (!done)
4331    
4332        else        else
4333          {          {
4334          bptr += use_offsets[1];          bptr += use_offsets[1] * CHAR_SIZE;
4335          len -= use_offsets[1];          len -= use_offsets[1];
4336          }          }
4337        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
# Line 2903  while (!done) Line 4346  while (!done)
4346  #endif  #endif
4347    
4348    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
4349    if (extra != NULL) new_free(extra);    if (extra != NULL)
4350        {
4351        PCRE_FREE_STUDY(extra);
4352        }
4353    if (locale_set)    if (locale_set)
4354      {      {
4355      new_free((void *)tables);      new_free((void *)tables);
4356      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
4357      locale_set = 0;      locale_set = 0;
4358      }      }
4359      if (jit_stack != NULL)
4360        {
4361        PCRE_JIT_STACK_FREE(jit_stack);
4362        jit_stack = NULL;
4363        }
4364    }    }
4365    
4366  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 2924  free(dbuffer); Line 4375  free(dbuffer);
4375  free(pbuffer);  free(pbuffer);
4376  free(offsets);  free(offsets);
4377    
4378    #ifdef SUPPORT_PCRE16
4379    if (buffer16 != NULL) free(buffer16);
4380    #endif
4381    
4382  return yield;  return yield;
4383  }  }
4384    

Legend:
Removed from v.567  
changed lines
  Added in v.909

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12