/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 645 by ph10, Sun Jul 31 17:02:18 2011 UTC revision 922 by ph10, Mon Feb 20 18:44:42 2012 UTC
# Line 1  Line 1 
1  /*************************************************  /*.************************************************
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 94  input mode under Windows. */ Line 105  input mode under Windows. */
105  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
106  #endif  #endif
107    
108    #define PRIV(name) name
109    
110  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
111  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 105  here before pcre_internal.h so that the Line 117  here before pcre_internal.h so that the
117  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
118    
119  #include "pcre.h"  #include "pcre.h"
120    
121    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122    /* Configure internal macros to 16 bit mode. */
123    #define COMPILE_PCRE16
124    #endif
125    
126  #include "pcre_internal.h"  #include "pcre_internal.h"
127    
128    /* The pcre_printint() function, which prints the internal form of a compiled
129    regex, is held in a separate file so that (a) it can be compiled in either
130    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131    when that is compiled in debug mode. */
132    
133    #ifdef SUPPORT_PCRE8
134    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135    #endif
136    #ifdef SUPPORT_PCRE16
137    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138    #endif
139    
140  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
141  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
142  external symbols to prevent clashes. */  external symbols to prevent clashes. */
143    
144  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
145    
146  #include "pcre_tables.c"  #include "pcre_tables.c"
147    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
148  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
149  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
150  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
151  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
152  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
153    
154    #ifdef EBCDIC
155    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156    #else
157    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158    #endif
159    
160    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161    
162  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* Posix support is disabled in 16 bit only mode. */
163    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164    #define NOPOSIX
165    #endif
166    
167  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
168  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 148  Makefile. */ Line 172  Makefile. */
172  #include "pcreposix.h"  #include "pcreposix.h"
173  #endif  #endif
174    
175  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
176  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
179  UTF8 support if PCRE is built without it. */  
180    #ifndef SUPPORT_UTF
181  #ifndef SUPPORT_UTF8  #ifndef NOUTF
182  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
183  #endif  #endif
184  #endif  #endif
185    
186    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188    only from one place and is handled differently). I couldn't dream up any way of
189    using a single macro to do this in a generic way, because of the many different
190    argument requirements. We know that at least one of SUPPORT_PCRE8 and
191    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192    use these in the definitions of generic macros.
193    
194    **** Special note about the PCHARSxxx macros: the address of the string to be
195    printed is always given as two arguments: a base address followed by an offset.
196    The base address is cast to the correct data size for 8 or 16 bit data; the
197    offset is in units of this size. If the string were given as base+offset in one
198    argument, the casting might be incorrectly applied. */
199    
200    #ifdef SUPPORT_PCRE8
201    
202    #define PCHARS8(lv, p, offset, len, f) \
203      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204    
205    #define PCHARSV8(p, offset, len, f) \
206      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207    
208    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209      p = read_capture_name8(p, cn8, re)
210    
211    #define STRLEN8(p) ((int)strlen((char *)p))
212    
213    #define SET_PCRE_CALLOUT8(callout) \
214      pcre_callout = callout
215    
216    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217       pcre_assign_jit_stack(extra, callback, userdata)
218    
219    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220      re = pcre_compile((char *)pat, options, error, erroffset, tables)
221    
222    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223        namesptr, cbuffer, size) \
224      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225        (char *)namesptr, cbuffer, size)
226    
227    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229    
230    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231        offsets, size_offsets, workspace, size_workspace) \
232      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233        offsets, size_offsets, workspace, size_workspace)
234    
235    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236        offsets, size_offsets) \
237      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238        offsets, size_offsets)
239    
240    #define PCRE_FREE_STUDY8(extra) \
241      pcre_free_study(extra)
242    
243    #define PCRE_FREE_SUBSTRING8(substring) \
244      pcre_free_substring(substring)
245    
246    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247      pcre_free_substring_list(listptr)
248    
249    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250        getnamesptr, subsptr) \
251      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252        (char *)getnamesptr, subsptr)
253    
254    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255      n = pcre_get_stringnumber(re, (char *)ptr)
256    
257    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259    
260    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262    
263    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265    
266    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267      pcre_printint(re, outfile, debug_lengths)
268    
269    #define PCRE_STUDY8(extra, re, options, error) \
270      extra = pcre_study(re, options, error)
271    
272    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273      pcre_jit_stack_alloc(startsize, maxsize)
274    
275    #define PCRE_JIT_STACK_FREE8(stack) \
276      pcre_jit_stack_free(stack)
277    
278    #endif /* SUPPORT_PCRE8 */
279    
280    /* -----------------------------------------------------------*/
281    
282    #ifdef SUPPORT_PCRE16
283    
284    #define PCHARS16(lv, p, offset, len, f) \
285      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286    
287    #define PCHARSV16(p, offset, len, f) \
288      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289    
290    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291      p = read_capture_name16(p, cn16, re)
292    
293    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294    
295    #define SET_PCRE_CALLOUT16(callout) \
296      pcre16_callout = (int (*)(pcre16_callout_block *))callout
297    
298    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299      pcre16_assign_jit_stack((pcre16_extra *)extra, \
300        (pcre16_jit_callback)callback, userdata)
301    
302    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304        tables)
305    
306    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307        namesptr, cbuffer, size) \
308      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310    
311    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313        (PCRE_UCHAR16 *)cbuffer, size/2)
314    
315    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316        offsets, size_offsets, workspace, size_workspace) \
317      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319        workspace, size_workspace)
320    
321    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322        offsets, size_offsets) \
323      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324        len, start_offset, options, offsets, size_offsets)
325    
326    #define PCRE_FREE_STUDY16(extra) \
327      pcre16_free_study((pcre16_extra *)extra)
328    
329    #define PCRE_FREE_SUBSTRING16(substring) \
330      pcre16_free_substring((PCRE_SPTR16)substring)
331    
332    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334    
335    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336        getnamesptr, subsptr) \
337      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339    
340    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342    
343    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345        (PCRE_SPTR16 *)(void*)subsptr)
346    
347    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349        (PCRE_SPTR16 **)(void*)listptr)
350    
351    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353        tables)
354    
355    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356      pcre16_printint(re, outfile, debug_lengths)
357    
358    #define PCRE_STUDY16(extra, re, options, error) \
359      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360    
361    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363    
364    #define PCRE_JIT_STACK_FREE16(stack) \
365      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366    
367    #endif /* SUPPORT_PCRE16 */
368    
369    
370    /* ----- Both modes are supported; a runtime test is needed, except for
371    pcre_config(), and the JIT stack functions, when it doesn't matter which
372    version is called. ----- */
373    
374    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375    
376    #define CHAR_SIZE (use_pcre16? 2:1)
377    
378    #define PCHARS(lv, p, offset, len, f) \
379      if (use_pcre16) \
380        PCHARS16(lv, p, offset, len, f); \
381      else \
382        PCHARS8(lv, p, offset, len, f)
383    
384    #define PCHARSV(p, offset, len, f) \
385      if (use_pcre16) \
386        PCHARSV16(p, offset, len, f); \
387      else \
388        PCHARSV8(p, offset, len, f)
389    
390    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391      if (use_pcre16) \
392        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393      else \
394        READ_CAPTURE_NAME8(p, cn8, cn16, re)
395    
396    #define SET_PCRE_CALLOUT(callout) \
397      if (use_pcre16) \
398        SET_PCRE_CALLOUT16(callout); \
399      else \
400        SET_PCRE_CALLOUT8(callout)
401    
402    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403    
404    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405      if (use_pcre16) \
406        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407      else \
408        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409    
410    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411      if (use_pcre16) \
412        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413      else \
414        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415    
416    #define PCRE_CONFIG pcre_config
417    
418    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419        namesptr, cbuffer, size) \
420      if (use_pcre16) \
421        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422          namesptr, cbuffer, size); \
423      else \
424        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425          namesptr, cbuffer, size)
426    
427    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428      if (use_pcre16) \
429        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430      else \
431        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432    
433    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434        offsets, size_offsets, workspace, size_workspace) \
435      if (use_pcre16) \
436        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437          offsets, size_offsets, workspace, size_workspace); \
438      else \
439        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440          offsets, size_offsets, workspace, size_workspace)
441    
442    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443        offsets, size_offsets) \
444      if (use_pcre16) \
445        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446          offsets, size_offsets); \
447      else \
448        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449          offsets, size_offsets)
450    
451    #define PCRE_FREE_STUDY(extra) \
452      if (use_pcre16) \
453        PCRE_FREE_STUDY16(extra); \
454      else \
455        PCRE_FREE_STUDY8(extra)
456    
457    #define PCRE_FREE_SUBSTRING(substring) \
458      if (use_pcre16) \
459        PCRE_FREE_SUBSTRING16(substring); \
460      else \
461        PCRE_FREE_SUBSTRING8(substring)
462    
463    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464      if (use_pcre16) \
465        PCRE_FREE_SUBSTRING_LIST16(listptr); \
466      else \
467        PCRE_FREE_SUBSTRING_LIST8(listptr)
468    
469    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470        getnamesptr, subsptr) \
471      if (use_pcre16) \
472        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473          getnamesptr, subsptr); \
474      else \
475        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476          getnamesptr, subsptr)
477    
478    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479      if (use_pcre16) \
480        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481      else \
482        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483    
484    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485      if (use_pcre16) \
486        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487      else \
488        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489    
490    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491      if (use_pcre16) \
492        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493      else \
494        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495    
496    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497      (use_pcre16 ? \
498         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500    
501    #define PCRE_JIT_STACK_FREE(stack) \
502      if (use_pcre16) \
503        PCRE_JIT_STACK_FREE16(stack); \
504      else \
505        PCRE_JIT_STACK_FREE8(stack)
506    
507    #define PCRE_MAKETABLES \
508      (use_pcre16? pcre16_maketables() : pcre_maketables())
509    
510    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511      if (use_pcre16) \
512        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513      else \
514        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515    
516    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517      if (use_pcre16) \
518        PCRE_PRINTINT16(re, outfile, debug_lengths); \
519      else \
520        PCRE_PRINTINT8(re, outfile, debug_lengths)
521    
522    #define PCRE_STUDY(extra, re, options, error) \
523      if (use_pcre16) \
524        PCRE_STUDY16(extra, re, options, error); \
525      else \
526        PCRE_STUDY8(extra, re, options, error)
527    
528    /* ----- Only 8-bit mode is supported ----- */
529    
530    #elif defined SUPPORT_PCRE8
531    #define CHAR_SIZE                 1
532    #define PCHARS                    PCHARS8
533    #define PCHARSV                   PCHARSV8
534    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
535    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
536    #define STRLEN                    STRLEN8
537    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
538    #define PCRE_COMPILE              PCRE_COMPILE8
539    #define PCRE_CONFIG               pcre_config
540    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
542    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
543    #define PCRE_EXEC                 PCRE_EXEC8
544    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
545    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
546    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
547    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
548    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
549    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
550    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
551    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
552    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
553    #define PCRE_MAKETABLES           pcre_maketables()
554    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555    #define PCRE_PRINTINT             PCRE_PRINTINT8
556    #define PCRE_STUDY                PCRE_STUDY8
557    
558    /* ----- Only 16-bit mode is supported ----- */
559    
560    #else
561    #define CHAR_SIZE                 2
562    #define PCHARS                    PCHARS16
563    #define PCHARSV                   PCHARSV16
564    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
565    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
566    #define STRLEN                    STRLEN16
567    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
568    #define PCRE_COMPILE              PCRE_COMPILE16
569    #define PCRE_CONFIG               pcre16_config
570    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
572    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
573    #define PCRE_EXEC                 PCRE_EXEC16
574    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
575    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
576    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
577    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
578    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
579    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
580    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
581    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
582    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
583    #define PCRE_MAKETABLES           pcre16_maketables()
584    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585    #define PCRE_PRINTINT             PCRE_PRINTINT16
586    #define PCRE_STUDY                PCRE_STUDY16
587    #endif
588    
589    /* ----- End of mode-specific function call macros ----- */
590    
591    
592  /* Other parameters */  /* Other parameters */
593    
# Line 187  static int debug_lengths; Line 615  static int debug_lengths;
615  static int first_callout;  static int first_callout;
616  static int locale_set = 0;  static int locale_set = 0;
617  static int show_malloc;  static int show_malloc;
618  static int use_utf8;  static int use_utf;
619  static size_t gotten_store;  static size_t gotten_store;
620    static size_t first_gotten_store = 0;
621  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
622    
623  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
624    
625  static int buffer_size = 50000;  static int buffer_size = 50000;
626  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
627  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
628  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
629    
630    /* Another buffer is needed translation to 16-bit character strings. It will
631    obtained and extended as required. */
632    
633    #ifdef SUPPORT_PCRE16
634    static int buffer16_size = 0;
635    static pcre_uint16 *buffer16 = NULL;
636    
637    #ifdef SUPPORT_PCRE8
638    
639    /* We need the table of operator lengths that is used for 16-bit compiling, in
640    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
641    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
642    appropriately for the 16-bit world. Just as a safety check, make sure that
643    COMPILE_PCRE16 is *not* set. */
644    
645    #ifdef COMPILE_PCRE16
646    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
647    #endif
648    
649    #if LINK_SIZE == 2
650    #undef LINK_SIZE
651    #define LINK_SIZE 1
652    #elif LINK_SIZE == 3 || LINK_SIZE == 4
653    #undef LINK_SIZE
654    #define LINK_SIZE 2
655    #else
656    #error LINK_SIZE must be either 2, 3, or 4
657    #endif
658    
659    #undef IMM2_SIZE
660    #define IMM2_SIZE 1
661    
662    #endif /* SUPPORT_PCRE8 */
663    
664    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
665    #endif  /* SUPPORT_PCRE16 */
666    
667    /* If we have 8-bit support, default use_pcre16 to false; if there is also
668    16-bit support, it can be changed by an option. If there is no 8-bit support,
669    there must be 16-bit support, so default it to 1. */
670    
671    #ifdef SUPPORT_PCRE8
672    static int use_pcre16 = 0;
673    #else
674    static int use_pcre16 = 1;
675    #endif
676    
677  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
678    
# Line 208  static const char *errtexts[] = { Line 684  static const char *errtexts[] = {
684    "magic number missing",    "magic number missing",
685    "unknown opcode - pattern overwritten?",    "unknown opcode - pattern overwritten?",
686    "no more memory",    "no more memory",
687    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
688    "match limit exceeded",    "match limit exceeded",
689    "callout error code",    "callout error code",
690    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
691    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
692    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
693    "not used - internal error",    "not used - internal error",
694    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 221  static const char *errtexts[] = { Line 697  static const char *errtexts[] = {
697    "backreference condition or recursion test not supported for DFA matching",    "backreference condition or recursion test not supported for DFA matching",
698    "match limit not supported for DFA matching",    "match limit not supported for DFA matching",
699    "workspace size exceeded in DFA matching",    "workspace size exceeded in DFA matching",
700    "too much recursion for DFA matching",    "too much recursion for DFA matching",
701    "recursion limit exceeded",    "recursion limit exceeded",
702    "not used - internal error",    "not used - internal error",
703    "invalid combination of newline options",    "invalid combination of newline options",
704    "bad offset value",    "bad offset value",
705    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
706    "nested recursion at the same subject position"    "nested recursion at the same subject position",
707      "JIT stack limit reached",
708      "pattern compiled in wrong mode: 8-bit/16-bit error"
709  };  };
710    
711    
712  /*************************************************  /*************************************************
713  *         Alternate character tables             *  *         Alternate character tables             *
# Line 243  the L (locale) option also adjusts the t Line 721  the L (locale) option also adjusts the t
721  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
722  only ASCII characters. */  only ASCII characters. */
723    
724  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
725    
726  /* This table is a lower casing table. */  /* This table is a lower casing table. */
727    
# Line 416  graph, print, punct, and cntrl. Other cl Line 894  graph, print, punct, and cntrl. Other cl
894  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
895  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
896    
897  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
898  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
899  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
900  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 579  return sys_errlist[n]; Line 1057  return sys_errlist[n];
1057  #endif /* HAVE_STRERROR */  #endif /* HAVE_STRERROR */
1058    
1059    
1060    /*************************************************
1061    *         JIT memory callback                    *
1062    *************************************************/
1063    
1064    static pcre_jit_stack* jit_callback(void *arg)
1065    {
1066    return (pcre_jit_stack *)arg;
1067    }
1068    
1069    
1070    #if !defined NOUTF || defined SUPPORT_PCRE16
1071    /*************************************************
1072    *            Convert UTF-8 string to value       *
1073    *************************************************/
1074    
1075    /* This function takes one or more bytes that represents a UTF-8 character,
1076    and returns the value of the character.
1077    
1078    Argument:
1079      utf8bytes   a pointer to the byte vector
1080      vptr        a pointer to an int to receive the value
1081    
1082    Returns:      >  0 => the number of bytes consumed
1083                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1084    */
1085    
1086    static int
1087    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1088    {
1089    int c = *utf8bytes++;
1090    int d = c;
1091    int i, j, s;
1092    
1093    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1094      {
1095      if ((d & 0x80) == 0) break;
1096      d <<= 1;
1097      }
1098    
1099    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1100    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1101    
1102    /* i now has a value in the range 1-5 */
1103    
1104    s = 6*i;
1105    d = (c & utf8_table3[i]) << s;
1106    
1107    for (j = 0; j < i; j++)
1108      {
1109      c = *utf8bytes++;
1110      if ((c & 0xc0) != 0x80) return -(j+1);
1111      s -= 6;
1112      d |= (c & 0x3f) << s;
1113      }
1114    
1115    /* Check that encoding was the correct unique one */
1116    
1117    for (j = 0; j < utf8_table1_size; j++)
1118      if (d <= utf8_table1[j]) break;
1119    if (j != i) return -(i+1);
1120    
1121    /* Valid value */
1122    
1123    *vptr = d;
1124    return i+1;
1125    }
1126    #endif /* NOUTF || SUPPORT_PCRE16 */
1127    
1128    
1129    
1130    #if !defined NOUTF || defined SUPPORT_PCRE16
1131    /*************************************************
1132    *       Convert character value to UTF-8         *
1133    *************************************************/
1134    
1135    /* This function takes an integer value in the range 0 - 0x7fffffff
1136    and encodes it as a UTF-8 character in 0 to 6 bytes.
1137    
1138    Arguments:
1139      cvalue     the character value
1140      utf8bytes  pointer to buffer for result - at least 6 bytes long
1141    
1142    Returns:     number of characters placed in the buffer
1143    */
1144    
1145    static int
1146    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1147    {
1148    register int i, j;
1149    for (i = 0; i < utf8_table1_size; i++)
1150      if (cvalue <= utf8_table1[i]) break;
1151    utf8bytes += i;
1152    for (j = i; j > 0; j--)
1153     {
1154     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1155     cvalue >>= 6;
1156     }
1157    *utf8bytes = utf8_table2[i] | cvalue;
1158    return i + 1;
1159    }
1160    #endif
1161    
1162    
1163    #ifdef SUPPORT_PCRE16
1164    /*************************************************
1165    *         Convert a string to 16-bit             *
1166    *************************************************/
1167    
1168    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1169    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1170    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1171    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1172    result is always left in buffer16.
1173    
1174    Note that this function does not object to surrogate values. This is
1175    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1176    for the purpose of testing that they are correctly faulted.
1177    
1178    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1179    in UTF-8 so that values greater than 255 can be handled.
1180    
1181    Arguments:
1182      data       TRUE if converting a data line; FALSE for a regex
1183      p          points to a byte string
1184      utf        true if UTF-8 (to be converted to UTF-16)
1185      len        number of bytes in the string (excluding trailing zero)
1186    
1187    Returns:     number of 16-bit data items used (excluding trailing zero)
1188                 OR -1 if a UTF-8 string is malformed
1189                 OR -2 if a value > 0x10ffff is encountered
1190                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1191    */
1192    
1193    static int
1194    to16(int data, pcre_uint8 *p, int utf, int len)
1195    {
1196    pcre_uint16 *pp;
1197    
1198    if (buffer16_size < 2*len + 2)
1199      {
1200      if (buffer16 != NULL) free(buffer16);
1201      buffer16_size = 2*len + 2;
1202      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1203      if (buffer16 == NULL)
1204        {
1205        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1206        exit(1);
1207        }
1208      }
1209    
1210    pp = buffer16;
1211    
1212    if (!utf && !data)
1213      {
1214      while (len-- > 0) *pp++ = *p++;
1215      }
1216    
1217    else
1218      {
1219      int c = 0;
1220      while (len > 0)
1221        {
1222        int chlen = utf82ord(p, &c);
1223        if (chlen <= 0) return -1;
1224        if (c > 0x10ffff) return -2;
1225        p += chlen;
1226        len -= chlen;
1227        if (c < 0x10000) *pp++ = c; else
1228          {
1229          if (!utf) return -3;
1230          c -= 0x10000;
1231          *pp++ = 0xD800 | (c >> 10);
1232          *pp++ = 0xDC00 | (c & 0x3ff);
1233          }
1234        }
1235      }
1236    
1237    *pp = 0;
1238    return pp - buffer16;
1239    }
1240    #endif
1241    
1242    
1243  /*************************************************  /*************************************************
# Line 604  Returns: pointer to the start of n Line 1263  Returns: pointer to the start of n
1263                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1264  */  */
1265    
1266  static uschar *  static pcre_uint8 *
1267  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1268  {  {
1269  uschar *here = start;  pcre_uint8 *here = start;
1270    
1271  for (;;)  for (;;)
1272    {    {
1273    int rlen = (int)(buffer_size - (here - buffer));    size_t rlen = (size_t)(buffer_size - (here - buffer));
1274    
1275    if (rlen > 1000)    if (rlen > 1000)
1276      {      {
# Line 654  for (;;) Line 1313  for (;;)
1313    else    else
1314      {      {
1315      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1316      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1317      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1318      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319    
1320      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1321        {        {
# Line 687  return NULL; /* Control never gets here Line 1346  return NULL; /* Control never gets here
1346    
1347    
1348    
   
   
   
   
1349  /*************************************************  /*************************************************
1350  *          Read number from string               *  *          Read number from string               *
1351  *************************************************/  *************************************************/
# Line 707  Returns: the unsigned long Line 1362  Returns: the unsigned long
1362  */  */
1363    
1364  static int  static int
1365  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1366  {  {
1367  int result = 0;  int result = 0;
1368  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 718  return(result); Line 1373  return(result);
1373    
1374    
1375    
   
1376  /*************************************************  /*************************************************
1377  *            Convert UTF-8 string to value       *  *             Print one character                *
1378  *************************************************/  *************************************************/
1379    
1380  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
1381    
1382  static int  static int pchar(int c, FILE *f)
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1383  {  {
1384  int c = *utf8bytes++;  if (PRINTOK(c))
1385  int d = c;    {
1386  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1387      return 1;
1388      }
1389    
1390  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1391    {    {
1392    if ((d & 0x80) == 0) break;    if (use_utf)
1393    d <<= 1;      {
1394        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1395        return 6;
1396        }
1397      else
1398        {
1399        if (f != NULL) fprintf(f, "\\x%02x", c);
1400        return 4;
1401        }
1402    }    }
1403    
1404  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1405  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1406           (c <= 0x00000fff)? 7 :
1407           (c <= 0x0000ffff)? 8 :
1408           (c <= 0x000fffff)? 9 : 10;
1409    }
1410    
 /* i now has a value in the range 1-5 */  
1411    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1412    
1413  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1414    {  /*************************************************
1415    c = *utf8bytes++;  *         Print 8-bit character string           *
1416    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1417    
1418  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1419    If handed a NULL file, just counts chars without printing. */
1420    
1421  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1422    if (d <= utf8_table1[j]) break;  {
1423  if (j != i) return -(i+1);  int c = 0;
1424    int yield = 0;
1425    
1426  /* Valid value */  if (length < 0)
1427      length = strlen((char *)p);
1428    
1429  *vptr = d;  while (length-- > 0)
1430  return i+1;    {
1431  }  #if !defined NOUTF
1432      if (use_utf)
1433        {
1434        int rc = utf82ord(p, &c);
1435        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1436          {
1437          length -= rc - 1;
1438          p += rc;
1439          yield += pchar(c, f);
1440          continue;
1441          }
1442        }
1443    #endif
1444      c = *p++;
1445      yield += pchar(c, f);
1446      }
1447    
1448    return yield;
1449    }
1450  #endif  #endif
1451    
1452    
1453    
1454    #ifdef SUPPORT_PCRE16
1455  /*************************************************  /*************************************************
1456  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1457  *************************************************/  *************************************************/
1458    
1459  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1460  {  {
1461  register int i, j;  int len = 0;
1462  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1463    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1464  }  }
1465    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1466    
1467    
1468    #ifdef SUPPORT_PCRE16
1469  /*************************************************  /*************************************************
1470  *             Print character string             *  *           Print 16-bit character string        *
1471  *************************************************/  *************************************************/
1472    
1473  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1474  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1475    
1476  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1477  {  {
 int c = 0;  
1478  int yield = 0;  int yield = 0;
1479    
1480    if (length < 0)
1481      length = strlen16(p);
1482    
1483  while (length-- > 0)  while (length-- > 0)
1484    {    {
1485  #if !defined NOUTF8    int c = *p++ & 0xffff;
1486    if (use_utf8)  #if !defined NOUTF
1487      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1488      {      {
1489      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1490        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1491        {        {
1492        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1493        p += rc;        length--;
1494        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1495        }        }
1496      }      }
1497  #endif  #endif
1498      yield += pchar(c, f);
1499      }
1500    
1501    return yield;
1502    }
1503    #endif  /* SUPPORT_PCRE16 */
1504    
    /* Not UTF-8, or malformed UTF-8  */  
1505    
1506    c = *p++;  
1507    if (PRINTHEX(c))  #ifdef SUPPORT_PCRE8
1508      {  /*************************************************
1509      if (f != NULL) fprintf(f, "%c", c);  *     Read a capture name (8-bit) and check it   *
1510      yield++;  *************************************************/
1511      }  
1512    else  static pcre_uint8 *
1513      {  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1514      if (f != NULL) fprintf(f, "\\x%02x", c);  {
1515      yield += 4;  pcre_uint8 *npp = *pp;
1516      }  while (isalnum(*p)) *npp++ = *p++;
1517    *npp++ = 0;
1518    *npp = 0;
1519    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1520      {
1521      fprintf(outfile, "no parentheses with name \"");
1522      PCHARSV(*pp, 0, -1, outfile);
1523      fprintf(outfile, "\"\n");
1524    }    }
1525    
1526  return yield;  *pp = npp;
1527    return p;
1528  }  }
1529    #endif  /* SUPPORT_PCRE8 */
1530    
1531    
1532    
1533    #ifdef SUPPORT_PCRE16
1534    /*************************************************
1535    *     Read a capture name (16-bit) and check it  *
1536    *************************************************/
1537    
1538    /* Note that the text being read is 8-bit. */
1539    
1540    static pcre_uint8 *
1541    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1542    {
1543    pcre_uint16 *npp = *pp;
1544    while (isalnum(*p)) *npp++ = *p++;
1545    *npp++ = 0;
1546    *npp = 0;
1547    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1548      {
1549      fprintf(outfile, "no parentheses with name \"");
1550      PCHARSV(*pp, 0, -1, outfile);
1551      fprintf(outfile, "\"\n");
1552      }
1553    *pp = npp;
1554    return p;
1555    }
1556    #endif  /* SUPPORT_PCRE16 */
1557    
1558    
1559    
# Line 905  if (callout_extra) Line 1582  if (callout_extra)
1582      else      else
1583        {        {
1584        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1585        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1586          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1587        fprintf(f, "\n");        fprintf(f, "\n");
1588        }        }
# Line 918  printed lengths of the substrings. */ Line 1595  printed lengths of the substrings. */
1595    
1596  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1597    
1598  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1599  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1600    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1601    
1602  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1603    
1604  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1605    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1606    
1607  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 961  fprintf(outfile, "%.*s", (cb->next_item_ Line 1638  fprintf(outfile, "%.*s", (cb->next_item_
1638  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1639  first_callout = 0;  first_callout = 0;
1640    
1641  if (cb->mark != last_callout_mark)  if (cb->mark != last_callout_mark)
1642    {    {
1643    fprintf(outfile, "Latest Mark: %s\n",    if (cb->mark == NULL)
1644      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));      fprintf(outfile, "Latest Mark: <unset>\n");
1645    last_callout_mark = cb->mark;    else
1646    }      {
1647        fprintf(outfile, "Latest Mark: ");
1648        PCHARSV(cb->mark, 0, -1, outfile);
1649        putc('\n', outfile);
1650        }
1651      last_callout_mark = cb->mark;
1652      }
1653    
1654  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1655    {    {
# Line 987  return (cb->callout_number != callout_fa Line 1670  return (cb->callout_number != callout_fa
1670  *            Local malloc functions              *  *            Local malloc functions              *
1671  *************************************************/  *************************************************/
1672    
1673  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1674  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1675    show_malloc variable is set only during matching. */
1676    
1677  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1678  {  {
1679  void *block = malloc(size);  void *block = malloc(size);
1680  gotten_store = size;  gotten_store = size;
1681    if (first_gotten_store == 0) first_gotten_store = size;
1682  if (show_malloc)  if (show_malloc)
1683    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1684  return block;  return block;
# Line 1006  if (show_malloc) Line 1691  if (show_malloc)
1691  free(block);  free(block);
1692  }  }
1693    
   
1694  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1695    
1696  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 1029  free(block); Line 1713  free(block);
1713  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1714  *************************************************/  *************************************************/
1715    
1716  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1717    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1718    value, but the code is defensive.
1719    
1720  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  Arguments:
1721      re        compiled regex
1722      study     study data
1723      option    PCRE_INFO_xxx option
1724      ptr       where to put the data
1725    
1726    Returns:    0 when OK, < 0 on error
1727    */
1728    
1729    static int
1730    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1731  {  {
1732  int rc;  int rc;
1733  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1734    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1735    #ifdef SUPPORT_PCRE16
1736      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1737    #else
1738      rc = PCRE_ERROR_BADMODE;
1739    #endif
1740    else
1741    #ifdef SUPPORT_PCRE8
1742      rc = pcre_fullinfo(re, study, option, ptr);
1743    #else
1744      rc = PCRE_ERROR_BADMODE;
1745    #endif
1746    
1747    if (rc < 0)
1748      {
1749      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1750        use_pcre16? "16" : "", option);
1751      if (rc == PCRE_ERROR_BADMODE)
1752        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1753          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1754      }
1755    
1756    return rc;
1757  }  }
1758    
1759    
1760    
1761  /*************************************************  /*************************************************
1762  *         Byte flipping function                 *  *             Swap byte functions                *
1763  *************************************************/  *************************************************/
1764    
1765  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1766  byteflip(unsigned long int value, int n)  value, respectively.
1767    
1768    Arguments:
1769      value        any number
1770    
1771    Returns:       the byte swapped value
1772    */
1773    
1774    static pcre_uint32
1775    swap_uint32(pcre_uint32 value)
1776  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1777  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1778         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1779         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1780         ((value & 0xff000000) >> 24);         (value >> 24);
1781  }  }
1782    
1783    static pcre_uint16
1784    swap_uint16(pcre_uint16 value)
1785    {
1786    return (value >> 8) | (value << 8);
1787    }
1788    
1789    
1790    
1791    /*************************************************
1792    *        Flip bytes in a compiled pattern        *
1793    *************************************************/
1794    
1795    /* This function is called if the 'F' option was present on a pattern that is
1796    to be written to a file. We flip the bytes of all the integer fields in the
1797    regex data block and the study block. In 16-bit mode this also flips relevant
1798    bytes in the pattern itself. This is to make it possible to test PCRE's
1799    ability to reload byte-flipped patterns, e.g. those compiled on a different
1800    architecture. */
1801    
1802    static void
1803    regexflip(pcre *ere, pcre_extra *extra)
1804    {
1805    REAL_PCRE *re = (REAL_PCRE *)ere;
1806    #ifdef SUPPORT_PCRE16
1807    int op;
1808    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1809    int length = re->name_count * re->name_entry_size;
1810    #ifdef SUPPORT_UTF
1811    BOOL utf = (re->options & PCRE_UTF16) != 0;
1812    BOOL utf16_char = FALSE;
1813    #endif /* SUPPORT_UTF */
1814    #endif /* SUPPORT_PCRE16 */
1815    
1816    /* Always flip the bytes in the main data block and study blocks. */
1817    
1818    re->magic_number = REVERSED_MAGIC_NUMBER;
1819    re->size = swap_uint32(re->size);
1820    re->options = swap_uint32(re->options);
1821    re->flags = swap_uint16(re->flags);
1822    re->top_bracket = swap_uint16(re->top_bracket);
1823    re->top_backref = swap_uint16(re->top_backref);
1824    re->first_char = swap_uint16(re->first_char);
1825    re->req_char = swap_uint16(re->req_char);
1826    re->name_table_offset = swap_uint16(re->name_table_offset);
1827    re->name_entry_size = swap_uint16(re->name_entry_size);
1828    re->name_count = swap_uint16(re->name_count);
1829    
1830    if (extra != NULL)
1831      {
1832      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1833      rsd->size = swap_uint32(rsd->size);
1834      rsd->flags = swap_uint32(rsd->flags);
1835      rsd->minlength = swap_uint32(rsd->minlength);
1836      }
1837    
1838    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1839    in the name table, if present, and then in the pattern itself. */
1840    
1841    #ifdef SUPPORT_PCRE16
1842    if (!use_pcre16) return;
1843    
1844    while(TRUE)
1845      {
1846      /* Swap previous characters. */
1847      while (length-- > 0)
1848        {
1849        *ptr = swap_uint16(*ptr);
1850        ptr++;
1851        }
1852    #ifdef SUPPORT_UTF
1853      if (utf16_char)
1854        {
1855        if ((ptr[-1] & 0xfc00) == 0xd800)
1856          {
1857          /* We know that there is only one extra character in UTF-16. */
1858          *ptr = swap_uint16(*ptr);
1859          ptr++;
1860          }
1861        }
1862      utf16_char = FALSE;
1863    #endif /* SUPPORT_UTF */
1864    
1865      /* Get next opcode. */
1866    
1867      length = 0;
1868      op = *ptr;
1869      *ptr++ = swap_uint16(op);
1870    
1871      switch (op)
1872        {
1873        case OP_END:
1874        return;
1875    
1876    #ifdef SUPPORT_UTF
1877        case OP_CHAR:
1878        case OP_CHARI:
1879        case OP_NOT:
1880        case OP_NOTI:
1881        case OP_STAR:
1882        case OP_MINSTAR:
1883        case OP_PLUS:
1884        case OP_MINPLUS:
1885        case OP_QUERY:
1886        case OP_MINQUERY:
1887        case OP_UPTO:
1888        case OP_MINUPTO:
1889        case OP_EXACT:
1890        case OP_POSSTAR:
1891        case OP_POSPLUS:
1892        case OP_POSQUERY:
1893        case OP_POSUPTO:
1894        case OP_STARI:
1895        case OP_MINSTARI:
1896        case OP_PLUSI:
1897        case OP_MINPLUSI:
1898        case OP_QUERYI:
1899        case OP_MINQUERYI:
1900        case OP_UPTOI:
1901        case OP_MINUPTOI:
1902        case OP_EXACTI:
1903        case OP_POSSTARI:
1904        case OP_POSPLUSI:
1905        case OP_POSQUERYI:
1906        case OP_POSUPTOI:
1907        case OP_NOTSTAR:
1908        case OP_NOTMINSTAR:
1909        case OP_NOTPLUS:
1910        case OP_NOTMINPLUS:
1911        case OP_NOTQUERY:
1912        case OP_NOTMINQUERY:
1913        case OP_NOTUPTO:
1914        case OP_NOTMINUPTO:
1915        case OP_NOTEXACT:
1916        case OP_NOTPOSSTAR:
1917        case OP_NOTPOSPLUS:
1918        case OP_NOTPOSQUERY:
1919        case OP_NOTPOSUPTO:
1920        case OP_NOTSTARI:
1921        case OP_NOTMINSTARI:
1922        case OP_NOTPLUSI:
1923        case OP_NOTMINPLUSI:
1924        case OP_NOTQUERYI:
1925        case OP_NOTMINQUERYI:
1926        case OP_NOTUPTOI:
1927        case OP_NOTMINUPTOI:
1928        case OP_NOTEXACTI:
1929        case OP_NOTPOSSTARI:
1930        case OP_NOTPOSPLUSI:
1931        case OP_NOTPOSQUERYI:
1932        case OP_NOTPOSUPTOI:
1933        if (utf) utf16_char = TRUE;
1934    #endif
1935        /* Fall through. */
1936    
1937        default:
1938        length = OP_lengths16[op] - 1;
1939        break;
1940    
1941        case OP_CLASS:
1942        case OP_NCLASS:
1943        /* Skip the character bit map. */
1944        ptr += 32/sizeof(pcre_uint16);
1945        length = 0;
1946        break;
1947    
1948        case OP_XCLASS:
1949        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1950        if (LINK_SIZE > 1)
1951          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1952            - (1 + LINK_SIZE + 1));
1953        else
1954          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1955    
1956        /* Reverse the size of the XCLASS instance. */
1957        *ptr = swap_uint16(*ptr);
1958        ptr++;
1959        if (LINK_SIZE > 1)
1960          {
1961          *ptr = swap_uint16(*ptr);
1962          ptr++;
1963          }
1964    
1965        op = *ptr;
1966        *ptr = swap_uint16(op);
1967        ptr++;
1968        if ((op & XCL_MAP) != 0)
1969          {
1970          /* Skip the character bit map. */
1971          ptr += 32/sizeof(pcre_uint16);
1972          length -= 32/sizeof(pcre_uint16);
1973          }
1974        break;
1975        }
1976      }
1977    /* Control should never reach here in 16 bit mode. */
1978    #endif /* SUPPORT_PCRE16 */
1979    }
1980    
1981    
1982    
# Line 1062  return ((value & 0x000000ff) << 24) | Line 1985  return ((value & 0x000000ff) << 24) |
1985  *************************************************/  *************************************************/
1986    
1987  static int  static int
1988  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1989    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1990    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1991  {  {
# Line 1077  for (;;) Line 2000  for (;;)
2000    {    {
2001    *limit = mid;    *limit = mid;
2002    
2003    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2004      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2005    
2006    if (count == errnumber)    if (count == errnumber)
# Line 1122  Returns: < 0, = 0, or > 0, according Line 2045  Returns: < 0, = 0, or > 0, according
2045  */  */
2046    
2047  static int  static int
2048  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2049  {  {
2050  while (n--)  while (n--)
2051    {    {
# Line 1149  Returns: appropriate PCRE_NEWLINE_x Line 2072  Returns: appropriate PCRE_NEWLINE_x
2072  */  */
2073    
2074  static int  static int
2075  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2076  {  {
2077  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2078  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2079  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2080  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2081  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2082  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2083  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2084  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2085  return 0;  return 0;
2086  }  }
# Line 1179  printf("If input is a terminal, readline Line 2102  printf("If input is a terminal, readline
2102  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2103  #endif  #endif
2104  printf("\nOptions:\n");  printf("\nOptions:\n");
2105  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2106    printf("  -16      use the 16-bit library\n");
2107    #endif
2108    printf("  -b       show compiled code\n");
2109  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2110    printf("  -C arg   show a specific compile-time option\n");
2111    printf("           and exit with its value. The arg can be:\n");
2112    printf("     linksize     internal link size [2, 3, 4]\n");
2113    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2114    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2115    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2116    printf("     ucp          Unicode Properties supported [0, 1]\n");
2117    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2118    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2119  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2120  #if !defined NODFA  #if !defined NODFA
2121  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1195  printf(" -p use POSIX interface\n Line 2130  printf(" -p use POSIX interface\n
2130  #endif  #endif
2131  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2132  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2133  printf("  -s       force each pattern to be studied\n"  printf("  -s       force each pattern to be studied at basic level\n"
2134           "  -s+      force each pattern to be studied, using JIT if available\n"
2135           "  -s++     ditto, verifying when JIT was actually used\n"
2136         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2137  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2138  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1215  options, followed by a set of test data, Line 2152  options, followed by a set of test data,
2152  int main(int argc, char **argv)  int main(int argc, char **argv)
2153  {  {
2154  FILE *infile = stdin;  FILE *infile = stdin;
2155    const char *version;
2156  int options = 0;  int options = 0;
2157  int study_options = 0;  int study_options = 0;
2158  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1223  int timeit = 0; Line 2161  int timeit = 0;
2161  int timeitm = 0;  int timeitm = 0;
2162  int showinfo = 0;  int showinfo = 0;
2163  int showstore = 0;  int showstore = 0;
2164  int force_study = 0;  int force_study = -1;
2165    int force_study_options = 0;
2166  int quiet = 0;  int quiet = 0;
2167  int size_offsets = 45;  int size_offsets = 45;
2168  int size_offsets_max;  int size_offsets_max;
# Line 1234  int posix = 0; Line 2173  int posix = 0;
2173  int debug = 0;  int debug = 0;
2174  int done = 0;  int done = 0;
2175  int all_use_dfa = 0;  int all_use_dfa = 0;
2176    int verify_jit = 0;
2177  int yield = 0;  int yield = 0;
2178  int stack_size;  int stack_size;
2179    
2180  /* These vectors store, end-to-end, a list of captured substring names. Assume  pcre_jit_stack *jit_stack = NULL;
 that 1024 is plenty long enough for the few names we'll be testing. */  
2181    
2182  uschar copynames[1024];  /* These vectors store, end-to-end, a list of zero-terminated captured
2183  uschar getnames[1024];  substring names, each list itself being terminated by an empty name. Assume
2184    that 1024 is plenty long enough for the few names we'll be testing. It is
2185  uschar *copynamesptr;  easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2186  uschar *getnamesptr;  for the actual memory, to ensure alignment. */
2187    
2188    pcre_uint16 copynames[1024];
2189    pcre_uint16 getnames[1024];
2190    
2191    #ifdef SUPPORT_PCRE16
2192    pcre_uint16 *cn16ptr;
2193    pcre_uint16 *gn16ptr;
2194    #endif
2195    
2196  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2197  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2198    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2199    pcre_uint8 *cn8ptr;
2200    pcre_uint8 *gn8ptr;
2201    #endif
2202    
2203  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2204  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2205  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2206    
2207    buffer = (pcre_uint8 *)malloc(buffer_size);
2208    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2209    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2210    
2211  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2212    
# Line 1266  it set 0x8000, but then I was advised th Line 2221  it set 0x8000, but then I was advised th
2221  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2222  #endif  #endif
2223    
2224    /* Get the version number: both pcre_version() and pcre16_version() give the
2225    same answer. We just need to ensure that we call one that is available. */
2226    
2227    #ifdef SUPPORT_PCRE8
2228    version = pcre_version();
2229    #else
2230    version = pcre16_version();
2231    #endif
2232    
2233  /* Scan options */  /* Scan options */
2234    
2235  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2236    {    {
2237    unsigned char *endptr;    pcre_uint8 *endptr;
2238      char *arg = argv[op];
2239    
2240      if (strcmp(arg, "-m") == 0) showstore = 1;
2241      else if (strcmp(arg, "-s") == 0) force_study = 0;
2242    
2243      else if (strncmp(arg, "-s+", 3) == 0)
2244        {
2245        arg += 3;
2246        if (*arg == '+') { arg++; verify_jit = TRUE; }
2247    
2248    if (strcmp(argv[op], "-m") == 0) showstore = 1;      if (*arg != 0) goto BAD_ARG;
2249    else if (strcmp(argv[op], "-s") == 0) force_study = 1;  
2250    else if (strcmp(argv[op], "-q") == 0) quiet = 1;      force_study = 1;
2251    else if (strcmp(argv[op], "-b") == 0) debug = 1;      force_study_options = PCRE_STUDY_JIT_COMPILE
2252    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;                          | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
2253    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;                          | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
2254    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;      }
2255      else if (strcmp(arg, "-16") == 0)
2256        {
2257    #ifdef SUPPORT_PCRE16
2258        use_pcre16 = 1;
2259    #else
2260        printf("** This version of PCRE was built without 16-bit support\n");
2261        exit(1);
2262    #endif
2263        }
2264      else if (strcmp(arg, "-q") == 0) quiet = 1;
2265      else if (strcmp(arg, "-b") == 0) debug = 1;
2266      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2267      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2268      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2269  #if !defined NODFA  #if !defined NODFA
2270    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2271  #endif  #endif
2272    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2273        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2274          *endptr == 0))          *endptr == 0))
2275      {      {
2276      op++;      op++;
2277      argc--;      argc--;
2278      }      }
2279    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2280      {      {
2281      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2282      int temp;      int temp;
2283      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2284                       *endptr == 0))                       *endptr == 0))
2285        {        {
2286        timeitm = temp;        timeitm = temp;
# Line 1303  while (argc > 1 && argv[op][0] == '-') Line 2290  while (argc > 1 && argv[op][0] == '-')
2290      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2291      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2292      }      }
2293    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2294        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2295          *endptr == 0))          *endptr == 0))
2296      {      {
2297  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
# Line 1326  while (argc > 1 && argv[op][0] == '-') Line 2313  while (argc > 1 && argv[op][0] == '-')
2313  #endif  #endif
2314      }      }
2315  #if !defined NOPOSIX  #if !defined NOPOSIX
2316    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2317  #endif  #endif
2318    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2319      {      {
2320      int rc;      int rc;
2321      unsigned long int lrc;      unsigned long int lrc;
2322      printf("PCRE version %s\n", pcre_version());  
2323        if (argc > 2)
2324          {
2325          if (strcmp(argv[op + 1], "linksize") == 0)
2326            {
2327            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2328            printf("%d\n", rc);
2329            yield = rc;
2330            goto EXIT;
2331            }
2332          if (strcmp(argv[op + 1], "pcre8") == 0)
2333            {
2334    #ifdef SUPPORT_PCRE8
2335            printf("1\n");
2336            yield = 1;
2337    #else
2338            printf("0\n");
2339            yield = 0;
2340    #endif
2341            goto EXIT;
2342            }
2343          if (strcmp(argv[op + 1], "pcre16") == 0)
2344            {
2345    #ifdef SUPPORT_PCRE16
2346            printf("1\n");
2347            yield = 1;
2348    #else
2349            printf("0\n");
2350            yield = 0;
2351    #endif
2352            goto EXIT;
2353            }
2354          if (strcmp(argv[op + 1], "utf") == 0)
2355            {
2356    #ifdef SUPPORT_PCRE8
2357            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2358            printf("%d\n", rc);
2359            yield = rc;
2360    #else
2361            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2362            printf("%d\n", rc);
2363            yield = rc;
2364    #endif
2365            goto EXIT;
2366            }
2367          if (strcmp(argv[op + 1], "ucp") == 0)
2368            {
2369            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2370            printf("%d\n", rc);
2371            yield = rc;
2372            goto EXIT;
2373            }
2374          if (strcmp(argv[op + 1], "jit") == 0)
2375            {
2376            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2377            printf("%d\n", rc);
2378            yield = rc;
2379            goto EXIT;
2380            }
2381          if (strcmp(argv[op + 1], "newline") == 0)
2382            {
2383            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2384            /* Note that these values are always the ASCII values, even
2385            in EBCDIC environments. CR is 13 and NL is 10. */
2386            printf("%s\n", (rc == 13)? "CR" :
2387              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2388              (rc == -2)? "ANYCRLF" :
2389              (rc == -1)? "ANY" : "???");
2390            goto EXIT;
2391            }
2392          printf("Unknown -C option: %s\n", argv[op + 1]);
2393          goto EXIT;
2394          }
2395    
2396        printf("PCRE version %s\n", version);
2397      printf("Compiled with\n");      printf("Compiled with\n");
2398    
2399    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2400    are set, either both UTFs are supported or both are not supported. */
2401    
2402    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2403        printf("  8-bit and 16-bit support\n");
2404        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2405        if (rc)
2406          printf("  UTF-8 and UTF-16 support\n");
2407        else
2408          printf("  No UTF-8 or UTF-16 support\n");
2409    #elif defined SUPPORT_PCRE8
2410        printf("  8-bit support only\n");
2411      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2412      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2413      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2414        printf("  16-bit support only\n");
2415        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2416        printf("  %sUTF-16 support\n", rc? "" : "No ");
2417    #endif
2418    
2419        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2420      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2421      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2422        if (rc)
2423          {
2424          const char *arch;
2425          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2426          printf("  Just-in-time compiler support: %s\n", arch);
2427          }
2428        else
2429          printf("  No just-in-time compiler support\n");
2430        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2431      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2432      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2433      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2434        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2435        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2436        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2437      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2438      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2439                                       "all Unicode newlines");                                       "all Unicode newlines");
2440      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2441      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2442      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2443      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2444      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2445      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2446      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2447      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2448      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2449      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2450        if (showstore)
2451          {
2452          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2453          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2454          }
2455        printf("\n");
2456      goto EXIT;      goto EXIT;
2457      }      }
2458    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2459             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2460      {      {
2461      usage();      usage();
2462      goto EXIT;      goto EXIT;
2463      }      }
2464    else    else
2465      {      {
2466      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2467        printf("** Unknown or malformed option %s\n", arg);
2468      usage();      usage();
2469      yield = 1;      yield = 1;
2470      goto EXIT;      goto EXIT;
# Line 1415  if (argc > 2) Line 2511  if (argc > 2)
2511    
2512  /* Set alternative malloc function */  /* Set alternative malloc function */
2513    
2514    #ifdef SUPPORT_PCRE8
2515  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2516  pcre_free = new_free;  pcre_free = new_free;
2517  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2518  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2519    #endif
2520    
2521    #ifdef SUPPORT_PCRE16
2522    pcre16_malloc = new_malloc;
2523    pcre16_free = new_free;
2524    pcre16_stack_malloc = stack_malloc;
2525    pcre16_stack_free = stack_free;
2526    #endif
2527    
2528  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2529    
2530  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2531    
2532  /* Main loop */  /* Main loop */
2533    
# Line 1437  while (!done) Line 2542  while (!done)
2542  #endif  #endif
2543    
2544    const char *error;    const char *error;
2545    unsigned char *markptr;    pcre_uint8 *markptr;
2546    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2547    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2548    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2549      unsigned long int get_options;
2550    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2551    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2552    int do_allcaps = 0;    int do_allcaps = 0;
2553    int do_mark = 0;    int do_mark = 0;
2554    int do_study = 0;    int do_study = 0;
2555    int no_force_study = 0;    int no_force_study = 0;
2556    int do_debug = debug;    int do_debug = debug;
2557    int do_G = 0;    int do_G = 0;
2558    int do_g = 0;    int do_g = 0;
# Line 1456  while (!done) Line 2562  while (!done)
2562    int do_flip = 0;    int do_flip = 0;
2563    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2564    
2565    use_utf8 = 0;    use_utf = 0;
2566    debug_lengths = 1;    debug_lengths = 1;
2567    
2568    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1471  while (!done) Line 2577  while (!done)
2577    
2578    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2579      {      {
2580      unsigned long int magic, get_options;      pcre_uint32 magic;
2581      uschar sbuf[8];      pcre_uint8 sbuf[8];
2582      FILE *f;      FILE *f;
2583    
2584      p++;      p++;
2585        if (*p == '!')
2586          {
2587          do_debug = TRUE;
2588          do_showinfo = TRUE;
2589          p++;
2590          }
2591    
2592      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2593      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2594      *pp = 0;      *pp = 0;
# Line 1487  while (!done) Line 2600  while (!done)
2600        continue;        continue;
2601        }        }
2602    
2603        first_gotten_store = 0;
2604      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2605    
2606      true_size =      true_size =
# Line 1494  while (!done) Line 2608  while (!done)
2608      true_study_size =      true_study_size =
2609        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2610    
2611      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2612      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2613    
2614      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2615    
2616      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2617      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2618        {        {
2619        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2620          {          {
2621          do_flip = 1;          do_flip = 1;
2622          }          }
# Line 1514  while (!done) Line 2628  while (!done)
2628          }          }
2629        }        }
2630    
2631        /* We hide the byte-invert info for little and big endian tests. */
2632      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2633        do_flip? " (byte-inverted)" : "", p);        do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
   
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
2634    
2635      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
2636    
# Line 1538  while (!done) Line 2648  while (!done)
2648          {          {
2649          FAIL_READ:          FAIL_READ:
2650          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2651          if (extra != NULL) new_free(extra);          if (extra != NULL)
2652              {
2653              PCRE_FREE_STUDY(extra);
2654              }
2655          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2656          fclose(f);          fclose(f);
2657          continue;          continue;
# Line 1548  while (!done) Line 2661  while (!done)
2661        }        }
2662      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2663    
2664        /* Flip the necessary bytes. */
2665        if (do_flip)
2666          {
2667          int rc;
2668          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2669          if (rc == PCRE_ERROR_BADMODE)
2670            {
2671            /* Simulate the result of the function call below. */
2672            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2673              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2674            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2675              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2676            continue;
2677            }
2678          }
2679    
2680        /* Need to know if UTF-8 for printing data strings. */
2681    
2682        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2683        use_utf = (get_options & PCRE_UTF8) != 0;
2684    
2685      fclose(f);      fclose(f);
2686      goto SHOW_INFO;      goto SHOW_INFO;
2687      }      }
2688    
2689    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2690    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2691    
2692    delimiter = *p++;    delimiter = *p++;
2693    
# Line 1619  while (!done) Line 2753  while (!done)
2753        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2754    
2755        case '+':        case '+':
2756        if (do_showrest) do_showcaprest = 1; else do_showrest = 1;        if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2757        break;        break;
2758    
2759        case '=': do_allcaps = 1; break;        case '=': do_allcaps = 1; break;
2760        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2761        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2762        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1640  while (!done) Line 2774  while (!done)
2774        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2775  #endif  #endif
2776    
2777        case 'S':        case 'S':
2778        if (do_study == 0) do_study = 1; else        if (do_study == 0)
2779            {
2780            do_study = 1;
2781            if (*pp == '+')
2782              {
2783              if (*(++pp) == '+')
2784                {
2785                verify_jit = TRUE;
2786                pp++;
2787                }
2788              study_options |= PCRE_STUDY_JIT_COMPILE
2789                            | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE
2790                            | PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE;
2791              }
2792            }
2793          else
2794          {          {
2795          do_study = 0;          do_study = 0;
2796          no_force_study = 1;          no_force_study = 1;
2797          }          }
2798        break;        break;
2799    
2800        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
# Line 1653  while (!done) Line 2802  while (!done)
2802        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2803        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2804        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2805        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2806        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2807    
2808        case 'T':        case 'T':
# Line 1687  while (!done) Line 2836  while (!done)
2836          goto SKIP_DATA;          goto SKIP_DATA;
2837          }          }
2838        locale_set = 1;        locale_set = 1;
2839        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2840        pp = ppp;        pp = ppp;
2841        break;        break;
2842    
# Line 1700  while (!done) Line 2849  while (!done)
2849    
2850        case '<':        case '<':
2851          {          {
2852          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2853            {            {
2854            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2855            pp += 3;            pp += 3;
# Line 1728  while (!done) Line 2877  while (!done)
2877    
2878    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2879    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2880    local character tables. */    local character tables. Neither does it have 16-bit support. */
2881    
2882  #if !defined NOPOSIX  #if !defined NOPOSIX
2883    if (posix || do_posix)    if (posix || do_posix)
# Line 1744  while (!done) Line 2893  while (!done)
2893      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2894      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2895    
2896        first_gotten_store = 0;
2897      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2898    
2899      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1763  while (!done) Line 2913  while (!done)
2913  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2914    
2915      {      {
2916      unsigned long int get_options;      /* In 16-bit mode, convert the input. */
2917    
2918    #ifdef SUPPORT_PCRE16
2919        if (use_pcre16)
2920          {
2921          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2922            {
2923            case -1:
2924            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2925              "converted to UTF-16\n");
2926            goto SKIP_DATA;
2927    
2928            case -2:
2929            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2930              "cannot be converted to UTF-16\n");
2931            goto SKIP_DATA;
2932    
2933            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2934            fprintf(outfile, "**Failed: character value greater than 0xffff "
2935              "cannot be converted to 16-bit in non-UTF mode\n");
2936            goto SKIP_DATA;
2937    
2938            default:
2939            break;
2940            }
2941          p = (pcre_uint8 *)buffer16;
2942          }
2943    #endif
2944    
2945        /* Compile many times when timing */
2946    
2947      if (timeit > 0)      if (timeit > 0)
2948        {        {
# Line 1772  while (!done) Line 2951  while (!done)
2951        clock_t start_time = clock();        clock_t start_time = clock();
2952        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2953          {          {
2954          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2955          if (re != NULL) free(re);          if (re != NULL) free(re);
2956          }          }
2957        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1781  while (!done) Line 2960  while (!done)
2960            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2961        }        }
2962    
2963      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2964        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2965    
2966      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2967      if non-interactive. */      if non-interactive. */
# Line 1812  while (!done) Line 2992  while (!done)
2992      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
2993      lines. */      lines. */
2994    
2995      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2996      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
2997        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
2998    
2999      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3000      and remember the store that was got. */      and remember the store that was got. */
3001    
3002      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3003      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3004    
3005        /* Output code size information if requested */
3006    
3007        if (log_store)
3008          fprintf(outfile, "Memory allocation (code space): %d\n",
3009            (int)(first_gotten_store -
3010                  sizeof(REAL_PCRE) -
3011                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3012    
3013      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3014      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
3015      suppresses the effect of /S (used for a few test patterns where studying is      suppresses the effect of /S (used for a few test patterns where studying is
3016      never sensible). */      never sensible). */
3017    
3018      if (do_study || (force_study && !no_force_study))      if (do_study || (force_study >= 0 && !no_force_study))
3019        {        {
3020        if (timeit > 0)        if (timeit > 0)
3021          {          {
# Line 1844  while (!done) Line 3023  while (!done)
3023          clock_t time_taken;          clock_t time_taken;
3024          clock_t start_time = clock();          clock_t start_time = clock();
3025          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3026            extra = pcre_study(re, study_options, &error);            {
3027              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3028              }
3029          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3030          if (extra != NULL) free(extra);          if (extra != NULL)
3031              {
3032              PCRE_FREE_STUDY(extra);
3033              }
3034          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3035            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3036              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3037          }          }
3038        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3039        if (error != NULL)        if (error != NULL)
3040          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3041        else if (extra != NULL)        else if (extra != NULL)
3042            {
3043          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3044            if (log_store)
3045              {
3046              size_t jitsize;
3047              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3048                  jitsize != 0)
3049                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3050              }
3051            }
3052        }        }
3053    
3054      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1871  while (!done) Line 3064  while (!done)
3064        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3065        }        }
3066    
3067      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3068    
3069      SHOW_INFO:      SHOW_INFO:
3070    
3071      if (do_debug)      if (do_debug)
3072        {        {
3073        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3074        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3075        }        }
3076    
3077      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1923  while (!done) Line 3079  while (!done)
3079      if (do_showinfo)      if (do_showinfo)
3080        {        {
3081        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3082        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3083          hascrorlf;          hascrorlf;
3084        int nameentrysize, namecount;        int nameentrysize, namecount;
3085        const uschar *nametable;        const pcre_uint8 *nametable;
3086    
3087        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3088        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3089        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3090        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3091        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3092        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3093        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3094        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3095        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3096        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3097        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3098              != 0)
3099  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3100    
3101        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3102          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1976  while (!done) Line 3111  while (!done)
3111          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3112          while (namecount-- > 0)          while (namecount-- > 0)
3113            {            {
3114            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3115              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3116              GET2(nametable, 0));  #else
3117              int imm2_size = IMM2_SIZE;
3118    #endif
3119              int length = (int)STRLEN(nametable + imm2_size);
3120              fprintf(outfile, "  ");
3121              PCHARSV(nametable, imm2_size, length, outfile);
3122              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3123    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3124              fprintf(outfile, "%3d\n", use_pcre16?
3125                 (int)(((PCRE_SPTR16)nametable)[0])
3126                :((int)nametable[0] << 8) | (int)nametable[1]);
3127              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3128    #else
3129              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3130    #ifdef SUPPORT_PCRE8
3131            nametable += nameentrysize;            nametable += nameentrysize;
3132    #else
3133              nametable += nameentrysize * 2;
3134    #endif
3135    #endif
3136            }            }
3137          }          }
3138    
3139        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3140        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3141    
3142        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3143        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3144    
3145        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3146          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 2003  while (!done) Line 3156  while (!done)
3156            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3157            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3158            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3159            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3160            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3161            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3162            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3163            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3164    
# Line 2047  while (!done) Line 3200  while (!done)
3200          }          }
3201        else        else
3202          {          {
3203          int ch = first_char & 255;          const char *caseless =
3204          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3205            "" : " (caseless)";            "" : " (caseless)";
3206          if (PRINTHEX(ch))  
3207            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3208              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3209          else          else
3210            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3211              fprintf(outfile, "First char = ");
3212              pchar(first_char, outfile);
3213              fprintf(outfile, "%s\n", caseless);
3214              }
3215          }          }
3216    
3217        if (need_char < 0)        if (need_char < 0)
# Line 2062  while (!done) Line 3220  while (!done)
3220          }          }
3221        else        else
3222          {          {
3223          int ch = need_char & 255;          const char *caseless =
3224          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3225            "" : " (caseless)";            "" : " (caseless)";
3226          if (PRINTHEX(ch))  
3227            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3228              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3229          else          else
3230            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3231              fprintf(outfile, "Need char = ");
3232              pchar(need_char, outfile);
3233              fprintf(outfile, "%s\n", caseless);
3234              }
3235          }          }
3236    
3237        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3238        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3239        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3240        flipped.) If study was forced by an external -s, don't show this        flipped.) If study was forced by an external -s, don't show this
3241        information unless -i or -d was also present. This means that, except        information unless -i or -d was also present. This means that, except
3242        when auto-callouts are involved, the output from runs with and without        when auto-callouts are involved, the output from runs with and without
3243        -s should be identical. */        -s should be identical. */
3244    
3245        if (do_study || (force_study && showinfo && !no_force_study))        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3246          {          {
3247          if (extra == NULL)          if (extra == NULL)
3248            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3249          else          else
3250            {            {
3251            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3252            int minlength;            int minlength;
3253    
3254            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3255            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3256    
3257            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3258              {              {
3259              int i;              if (start_bits == NULL)
3260              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3261              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3262                {                {
3263                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3264                  int c = 24;
3265                  fprintf(outfile, "Starting byte set: ");
3266                  for (i = 0; i < 256; i++)
3267                  {                  {
3268                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
3269                    {                    {
3270                    fprintf(outfile, "%c ", i);                    if (c > 75)
3271                    c += 2;                      {
3272                    }                      fprintf(outfile, "\n  ");
3273                  else                      c = 2;
3274                    {                      }
3275                    fprintf(outfile, "\\x%02x ", i);                    if (PRINTOK(i) && i != ' ')
3276                    c += 5;                      {
3277                        fprintf(outfile, "%c ", i);
3278                        c += 2;
3279                        }
3280                      else
3281                        {
3282                        fprintf(outfile, "\\x%02x ", i);
3283                        c += 5;
3284                        }
3285                    }                    }
3286                  }                  }
3287                  fprintf(outfile, "\n");
3288                }                }
3289              fprintf(outfile, "\n");              }
3290              }
3291    
3292            /* Show this only if the JIT was set by /S, not by -s. */
3293    
3294            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3295              {
3296              int jit;
3297              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3298                {
3299                if (jit)
3300                  fprintf(outfile, "JIT study was successful\n");
3301                else
3302    #ifdef SUPPORT_JIT
3303                  fprintf(outfile, "JIT study was not successful\n");
3304    #else
3305                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3306    #endif
3307              }              }
3308            }            }
3309          }          }
# Line 2139  while (!done) Line 3322  while (!done)
3322          }          }
3323        else        else
3324          {          {
3325          uschar sbuf[8];          pcre_uint8 sbuf[8];
3326          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3327          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3328          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3329          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3330            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3331          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3332          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3333          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3334          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3335            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3336    
3337          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3338              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2158  while (!done) Line 3342  while (!done)
3342          else          else
3343            {            {
3344            fprintf(outfile, "Compiled pattern written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3345    
3346            /* If there is study data, write it, but verify the writing only            /* If there is study data, write it. */
3347            if the studying was requested by /S, not just by -s. */  
   
3348            if (extra != NULL)            if (extra != NULL)
3349              {              {
3350              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 2177  while (!done) Line 3360  while (!done)
3360          }          }
3361    
3362        new_free(re);        new_free(re);
3363        if (extra != NULL) new_free(extra);        if (extra != NULL)
3364            {
3365            PCRE_FREE_STUDY(extra);
3366            }
3367        if (locale_set)        if (locale_set)
3368          {          {
3369          new_free((void *)tables);          new_free((void *)tables);
# Line 2192  while (!done) Line 3378  while (!done)
3378    
3379    for (;;)    for (;;)
3380      {      {
3381      uschar *q;      pcre_uint8 *q;
3382      uschar *bptr;      pcre_uint8 *bptr;
3383      int *use_offsets = offsets;      int *use_offsets = offsets;
3384      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3385      int callout_data = 0;      int callout_data = 0;
# Line 2208  while (!done) Line 3394  while (!done)
3394      int start_offset_sign = 1;      int start_offset_sign = 1;
3395      int g_notempty = 0;      int g_notempty = 0;
3396      int use_dfa = 0;      int use_dfa = 0;
3397        int jit_was_used = 0;
     options = 0;  
3398    
3399      *copynames = 0;      *copynames = 0;
3400      *getnames = 0;      *getnames = 0;
3401    
3402      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3403      getnamesptr = getnames;      cn16ptr = copynames;
3404        gn16ptr = getnames;
3405    #endif
3406    #ifdef SUPPORT_PCRE8
3407        cn8ptr = copynames8;
3408        gn8ptr = getnames8;
3409    #endif
3410    
3411      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3412      first_callout = 1;      first_callout = 1;
3413      last_callout_mark = NULL;      last_callout_mark = NULL;
3414      callout_extra = 0;      callout_extra = 0;
3415      callout_count = 0;      callout_count = 0;
3416      callout_fail_count = 999999;      callout_fail_count = 999999;
3417      callout_fail_id = -1;      callout_fail_id = -1;
3418      show_malloc = 0;      show_malloc = 0;
3419        options = 0;
3420    
3421      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3422        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 2260  while (!done) Line 3452  while (!done)
3452        int i = 0;        int i = 0;
3453        int n = 0;        int n = 0;
3454    
3455        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3456          In non-UTF mode, allow the value of the byte to fall through to later,
3457          where values greater than 127 are turned into UTF-8 when running in
3458          16-bit mode. */
3459    
3460          if (c != '\\')
3461            {
3462            if (use_utf)
3463              {
3464              *q++ = c;
3465              continue;
3466              }
3467            }
3468    
3469          /* Handle backslash escapes */
3470    
3471          else switch ((c = *p++))
3472          {          {
3473          case 'a': c =    7; break;          case 'a': c =    7; break;
3474          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 2276  while (!done) Line 3484  while (!done)
3484          c -= '0';          c -= '0';
3485          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3486            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3487          break;          break;
3488    
3489          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3490          if (*p == '{')          if (*p == '{')
3491            {            {
3492            unsigned char *pt = p;            pcre_uint8 *pt = p;
3493            c = 0;            c = 0;
3494            while (isxdigit(*(++pt)))  
3495              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3496              when isxdigit() is a macro that refers to its argument more than
3497              once. This is banned by the C Standard, but apparently happens in at
3498              least one MacOS environment. */
3499    
3500              for (pt++; isxdigit(*pt); pt++)
3501                {
3502                if (++i == 9)
3503                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3504                                   "using only the first eight.\n");
3505                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3506                }
3507            if (*pt == '}')            if (*pt == '}')
3508              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3509              p = pt + 1;              p = pt + 1;
3510              break;              break;
3511              }              }
3512            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3513            }            }
 #endif  
3514    
3515          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3516            allows UTF-8 characters to be constructed byte by byte, and also allows
3517            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3518            Otherwise, pass it down to later code so that it can be turned into
3519            UTF-8 when running in 16-bit mode. */
3520    
3521          c = 0;          c = 0;
3522          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3523            {            {
3524            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3525            p++;            p++;
3526            }            }
3527            if (use_utf)
3528              {
3529              *q++ = c;
3530              continue;
3531              }
3532          break;          break;
3533    
3534          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 2364  while (!done) Line 3561  while (!done)
3561            }            }
3562          else if (isalnum(*p))          else if (isalnum(*p))
3563            {            {
3564            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3565            }            }
3566          else if (*p == '+')          else if (*p == '+')
3567            {            {
# Line 2380  while (!done) Line 3570  while (!done)
3570            }            }
3571          else if (*p == '-')          else if (*p == '-')
3572            {            {
3573            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3574            p++;            p++;
3575            }            }
3576          else if (*p == '!')          else if (*p == '!')
# Line 2434  while (!done) Line 3624  while (!done)
3624            }            }
3625          else if (isalnum(*p))          else if (isalnum(*p))
3626            {            {
3627            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3628            while (isalnum(*p)) *npp++ = *p++;            }
3629            *npp++ = 0;          continue;
3630            *npp = 0;  
3631            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3632            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3633              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3634            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3635                && extra->executable_jit != NULL)
3636              {
3637              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3638              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3639              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3640            }            }
3641          continue;          continue;
3642    
# Line 2537  while (!done) Line 3732  while (!done)
3732            }            }
3733          continue;          continue;
3734          }          }
3735        *q++ = c;  
3736          /* We now have a character value in c that may be greater than 255. In
3737          16-bit mode, we always convert characters to UTF-8 so that values greater
3738          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3739          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3740          mode must have come from \x{...} or octal constructs because values from
3741          \x.. get this far only in non-UTF mode. */
3742    
3743    #if !defined NOUTF || defined SUPPORT_PCRE16
3744          if (use_pcre16 || use_utf)
3745            {
3746            pcre_uint8 buff8[8];
3747            int ii, utn;
3748            utn = ord2utf8(c, buff8);
3749            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3750            }
3751          else
3752    #endif
3753            {
3754            if (c > 255)
3755              {
3756              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3757                "and UTF-8 mode is not enabled.\n", c);
3758              fprintf(outfile, "** Truncation will probably give the wrong "
3759                "result.\n");
3760              }
3761            *q++ = c;
3762            }
3763        }        }
3764    
3765        /* Reached end of subject string */
3766    
3767      *q = 0;      *q = 0;
3768      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
3769    
# Line 2600  while (!done) Line 3825  while (!done)
3825            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3826              {              {
3827              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3828              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3829                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3830              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3831              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3832                {                {
3833                fprintf(outfile, "%2d+ ", (int)i);                fprintf(outfile, "%2d+ ", (int)i);
3834                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3835                  outfile);                  outfile);
3836                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3837                }                }
# Line 2614  while (!done) Line 3839  while (!done)
3839            }            }
3840          }          }
3841        free(pmatch);        free(pmatch);
3842          goto NEXT_DATA;
3843        }        }
3844    
3845    #endif  /* !defined NOPOSIX */
3846    
3847      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3848    
3849      else  #ifdef SUPPORT_PCRE16
3850  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3851          {
3852          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3853          switch(len)
3854            {
3855            case -1:
3856            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3857              "converted to UTF-16\n");
3858            goto NEXT_DATA;
3859    
3860            case -2:
3861            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3862              "cannot be converted to UTF-16\n");
3863            goto NEXT_DATA;
3864    
3865            case -3:
3866            fprintf(outfile, "**Failed: character value greater than 0xffff "
3867              "cannot be converted to 16-bit in non-UTF mode\n");
3868            goto NEXT_DATA;
3869    
3870            default:
3871            break;
3872            }
3873          bptr = (pcre_uint8 *)buffer16;
3874          }
3875    #endif
3876    
3877      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3878        {        {
# Line 2636  while (!done) Line 3889  while (!done)
3889            {            {
3890            int workspace[1000];            int workspace[1000];
3891            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3892              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3893                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3894                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3895                  (sizeof(workspace)/sizeof(int)));
3896                }
3897            }            }
3898          else          else
3899  #endif  #endif
3900    
3901          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3902            count = pcre_exec(re, extra, (char *)bptr, len,            {
3903              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3904                (options | g_notempty), use_offsets, use_size_offsets);
3905              }
3906          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3907          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3908            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2655  while (!done) Line 3911  while (!done)
3911    
3912        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3913        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3914        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3915          running of pcre_exec(), so disable the JIT optimization. This makes it
3916          possible to run the same set of tests with and without JIT externally
3917          requested. */
3918    
3919        if (find_match_limit)        if (find_match_limit)
3920          {          {
# Line 2664  while (!done) Line 3923  while (!done)
3923            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3924            extra->flags = 0;            extra->flags = 0;
3925            }            }
3926            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3927    
3928          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3929            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2687  while (!done) Line 3947  while (!done)
3947            }            }
3948          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3949          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3950          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3951            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3952          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3953          }          }
# Line 2699  while (!done) Line 3959  while (!done)
3959        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3960          {          {
3961          int workspace[1000];          int workspace[1000];
3962          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3963            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3964            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3965          if (count == 0)          if (count == 0)
3966            {            {
3967            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2712  while (!done) Line 3972  while (!done)
3972    
3973        else        else
3974          {          {
3975          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3976            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3977          if (count == 0)          if (count == 0)
3978            {            {
3979            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
3980            count = use_size_offsets/3;            count = use_size_offsets/3;
3981            }            }
3982          }          }
3983    
3984          if (extra != NULL && (extra->flags & PCRE_EXTRA_USED_JIT) != 0)
3985            jit_was_used = TRUE;
3986    
3987        /* Matched */        /* Matched */
3988    
3989        if (count >= 0)        if (count >= 0)
3990          {          {
3991          int i, maxcount;          int i, maxcount;
3992            void *cnptr, *gnptr;
3993    
3994  #if !defined NODFA  #if !defined NODFA
3995          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2746  while (!done) Line 4010  while (!done)
4010              do_g = do_G = FALSE;        /* Break g/G loop */              do_g = do_G = FALSE;        /* Break g/G loop */
4011              }              }
4012            }            }
4013    
4014          /* do_allcaps requests showing of all captures in the pattern, to check          /* do_allcaps requests showing of all captures in the pattern, to check
4015          unset ones at the end. */          unset ones at the end. */
4016    
4017          if (do_allcaps)          if (do_allcaps)
4018            {            {
4019            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4020            count++;   /* Allow for full match */              goto SKIP_DATA;
4021            if (count * 2 > use_size_offsets) count = use_size_offsets/2;            count++;   /* Allow for full match */
4022            }            if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4023              }
4024    
4025          /* Output the captured substrings */          /* Output the captured substrings */
4026    
4027          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
4028            {            {
4029            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
4030              {              {
4031              if (use_offsets[i] != -1)              if (use_offsets[i] != -1)
4032                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4033                  use_offsets[i], i);                  use_offsets[i], i);
4034              if (use_offsets[i+1] != -1)              if (use_offsets[i+1] != -1)
4035                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4036                  use_offsets[i+1], i+1);                  use_offsets[i+1], i+1);
4037              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
4038              }              }
4039            else            else
4040              {              {
4041              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4042              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4043                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4044                if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4045              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4046              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
4047                {                {
4048                fprintf(outfile, "%2d+ ", i/2);                fprintf(outfile, "%2d+ ", i/2);
4049                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4050                  outfile);                  outfile);
4051                fprintf(outfile, "\n");                fprintf(outfile, "\n");
4052                }                }
4053              }              }
4054            }            }
4055    
4056          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
4057              {
4058              fprintf(outfile, "MK: ");
4059              PCHARSV(markptr, 0, -1, outfile);
4060              fprintf(outfile, "\n");
4061              }
4062    
4063          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4064            {            {
4065            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
4066              {              {
4067                int rc;
4068              char copybuffer[256];              char copybuffer[256];
4069              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4070                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
4071              if (rc < 0)              if (rc < 0)
4072                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4073              else              else
4074                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
4075                  fprintf(outfile, "%2dC ", i);
4076                  PCHARSV(copybuffer, 0, rc, outfile);
4077                  fprintf(outfile, " (%d)\n", rc);
4078                  }
4079              }              }
4080            }            }
4081    
4082          for (copynamesptr = copynames;          cnptr = copynames;
4083               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4084            {            {
4085              int rc;
4086            char copybuffer[256];            char copybuffer[256];
4087            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4088              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4089                {
4090                if (*(pcre_uint16 *)cnptr == 0) break;
4091                }
4092              else
4093                {
4094                if (*(pcre_uint8 *)cnptr == 0) break;
4095                }
4096    
4097              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4098                cnptr, copybuffer, sizeof(copybuffer));
4099    
4100            if (rc < 0)            if (rc < 0)
4101              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4102                fprintf(outfile, "copy substring ");
4103                PCHARSV(cnptr, 0, -1, outfile);
4104                fprintf(outfile, " failed %d\n", rc);
4105                }
4106            else            else
4107              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4108                fprintf(outfile, "  C ");
4109                PCHARSV(copybuffer, 0, rc, outfile);
4110                fprintf(outfile, " (%d) ", rc);
4111                PCHARSV(cnptr, 0, -1, outfile);
4112                putc('\n', outfile);
4113                }
4114    
4115              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4116            }            }
4117    
4118          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4119            {            {
4120            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4121              {              {
4122                int rc;
4123              const char *substring;              const char *substring;
4124              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4125              if (rc < 0)              if (rc < 0)
4126                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4127              else              else
4128                {                {
4129                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4130                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4131                  fprintf(outfile, " (%d)\n", rc);
4132                  PCRE_FREE_SUBSTRING(substring);
4133                }                }
4134              }              }
4135            }            }
4136    
4137          for (getnamesptr = getnames;          gnptr = getnames;
4138               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4139            {            {
4140              int rc;
4141            const char *substring;            const char *substring;
4142            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4143              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4144                {
4145                if (*(pcre_uint16 *)gnptr == 0) break;
4146                }
4147              else
4148                {
4149                if (*(pcre_uint8 *)gnptr == 0) break;
4150                }
4151    
4152              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4153                gnptr, &substring);
4154            if (rc < 0)            if (rc < 0)
4155              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4156                fprintf(outfile, "get substring ");
4157                PCHARSV(gnptr, 0, -1, outfile);
4158                fprintf(outfile, " failed %d\n", rc);
4159                }
4160            else            else
4161              {              {
4162              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4163              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4164                fprintf(outfile, " (%d) ", rc);
4165                PCHARSV(gnptr, 0, -1, outfile);
4166                PCRE_FREE_SUBSTRING(substring);
4167                putc('\n', outfile);
4168              }              }
4169    
4170              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4171            }            }
4172    
4173          if (getlist)          if (getlist)
4174            {            {
4175              int rc;
4176            const char **stringlist;            const char **stringlist;
4177            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4178            if (rc < 0)            if (rc < 0)
4179              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4180            else            else
4181              {              {
4182              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4183                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4184                  fprintf(outfile, "%2dL ", i);
4185                  PCHARSV(stringlist[i], 0, -1, outfile);
4186                  putc('\n', outfile);
4187                  }
4188              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4189                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4190              /* free((void *)stringlist); */              PCRE_FREE_SUBSTRING_LIST(stringlist);
             pcre_free_substring_list(stringlist);  
4191              }              }
4192            }            }
4193          }          }
# Line 2873  while (!done) Line 4197  while (!done)
4197        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4198          {          {
4199          if (markptr == NULL) fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4200            else fprintf(outfile, "Partial match, mark=%s", markptr);          else
4201              {
4202              fprintf(outfile, "Partial match, mark=");
4203              PCHARSV(markptr, 0, -1, outfile);
4204              }
4205          if (use_size_offsets > 1)          if (use_size_offsets > 1)
4206            {            {
4207            fprintf(outfile, ": ");            fprintf(outfile, ": ");
4208            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4209              outfile);              outfile);
4210            }            }
4211          fprintf(outfile, "\n");          fprintf(outfile, "\n");
# Line 2894  while (!done) Line 4222  while (!done)
4222        terminated by CRLF, an advance of one character just passes the \r,        terminated by CRLF, an advance of one character just passes the \r,
4223        whereas we should prefer the longer newline sequence, as does the code in        whereas we should prefer the longer newline sequence, as does the code in
4224        pcre_exec(). Fudge the offset value to achieve this. We check for a        pcre_exec(). Fudge the offset value to achieve this. We check for a
4225        newline setting in the pattern; if none was set, use pcre_config() to        newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4226        find the default.        find the default.
4227    
4228        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
# Line 2905  while (!done) Line 4233  while (!done)
4233          if (g_notempty != 0)          if (g_notempty != 0)
4234            {            {
4235            int onechar = 1;            int onechar = 1;
4236            unsigned int obits = ((real_pcre *)re)->options;            unsigned int obits = ((REAL_PCRE *)re)->options;
4237            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
4238            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
4239              {              {
4240              int d;              int d;
4241              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4242              /* Note that these values are always the ASCII ones, even in              /* Note that these values are always the ASCII ones, even in
4243              EBCDIC environments. CR = 13, NL = 10. */              EBCDIC environments. CR = 13, NL = 10. */
4244              obits = (d == 13)? PCRE_NEWLINE_CR :              obits = (d == 13)? PCRE_NEWLINE_CR :
# Line 2924  while (!done) Line 4252  while (!done)
4252                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4253                &&                &&
4254                start_offset < len - 1 &&                start_offset < len - 1 &&
4255                bptr[start_offset] == '\r' &&  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4256                bptr[start_offset+1] == '\n')                (use_pcre16?
4257                       ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4258                    && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4259                  :
4260                       bptr[start_offset] == '\r'
4261                    && bptr[start_offset + 1] == '\n')
4262    #elif defined SUPPORT_PCRE16
4263                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4264                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4265    #else
4266                     bptr[start_offset] == '\r'
4267                  && bptr[start_offset + 1] == '\n'
4268    #endif
4269                  )
4270              onechar++;              onechar++;
4271            else if (use_utf8)            else if (use_utf)
4272              {              {
4273              while (start_offset + onechar < len)              while (start_offset + onechar < len)
4274                {                {
# Line 2940  while (!done) Line 4281  while (!done)
4281          else          else
4282            {            {
4283            switch(count)            switch(count)
4284              {              {
4285              case PCRE_ERROR_NOMATCH:              case PCRE_ERROR_NOMATCH:
4286              if (gmatched == 0)              if (gmatched == 0)
4287                {                {
4288                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL)
4289                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  {
4290                    fprintf(outfile, "No match");
4291                    }
4292                  else
4293                    {
4294                    fprintf(outfile, "No match, mark = ");
4295                    PCHARSV(markptr, 0, -1, outfile);
4296                    }
4297                  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4298                  putc('\n', outfile);
4299                }                }
4300              break;              break;
4301    
4302              case PCRE_ERROR_BADUTF8:              case PCRE_ERROR_BADUTF8:
4303              case PCRE_ERROR_SHORTUTF8:              case PCRE_ERROR_SHORTUTF8:
4304              fprintf(outfile, "Error %d (%s UTF-8 string)", count,              fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4305                (count == PCRE_ERROR_BADUTF8)? "bad" : "short");                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4306                  use_pcre16? "16" : "8");
4307              if (use_size_offsets >= 2)              if (use_size_offsets >= 2)
4308                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4309                  use_offsets[1]);                  use_offsets[1]);
4310              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4311              break;              break;
4312    
4313                case PCRE_ERROR_BADUTF8_OFFSET:
4314                fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4315                  use_pcre16? "16" : "8");
4316                break;
4317    
4318              default:              default:
4319              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))              if (count < 0 &&
4320                    (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4321                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4322              else              else
4323                fprintf(outfile, "Error %d (Unexpected value)\n", count);                fprintf(outfile, "Error %d (Unexpected value)\n", count);
4324              break;              break;
4325              }              }
4326    
4327            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
4328            }            }
4329          }          }
# Line 2998  while (!done) Line 4355  while (!done)
4355    
4356        else        else
4357          {          {
4358          bptr += use_offsets[1];          bptr += use_offsets[1] * CHAR_SIZE;
4359          len -= use_offsets[1];          len -= use_offsets[1];
4360          }          }
4361        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
# Line 3013  while (!done) Line 4370  while (!done)
4370  #endif  #endif
4371    
4372    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
4373    if (extra != NULL) new_free(extra);    if (extra != NULL)
4374        {
4375        PCRE_FREE_STUDY(extra);
4376        }
4377    if (locale_set)    if (locale_set)
4378      {      {
4379      new_free((void *)tables);      new_free((void *)tables);
4380      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
4381      locale_set = 0;      locale_set = 0;
4382      }      }
4383      if (jit_stack != NULL)
4384        {
4385        PCRE_JIT_STACK_FREE(jit_stack);
4386        jit_stack = NULL;
4387        }
4388    }    }
4389    
4390  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 3034  free(dbuffer); Line 4399  free(dbuffer);
4399  free(pbuffer);  free(pbuffer);
4400  free(offsets);  free(offsets);
4401    
4402    #ifdef SUPPORT_PCRE16
4403    if (buffer16 != NULL) free(buffer16);
4404    #endif
4405    
4406  return yield;  return yield;
4407  }  }
4408    

Legend:
Removed from v.645  
changed lines
  Added in v.922

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12