/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 580 by ph10, Fri Nov 26 11:16:43 2010 UTC revision 904 by ph10, Mon Jan 23 17:30:49 2012 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 105  here before pcre_internal.h so that the Line 116  here before pcre_internal.h so that the
116  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
117    
118  #include "pcre.h"  #include "pcre.h"
119    
120    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121    /* Configure internal macros to 16 bit mode. */
122    #define COMPILE_PCRE16
123    #endif
124    
125  #include "pcre_internal.h"  #include "pcre_internal.h"
126    
127    /* The pcre_printint() function, which prints the internal form of a compiled
128    regex, is held in a separate file so that (a) it can be compiled in either
129    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130    when that is compiled in debug mode. */
131    
132    #ifdef SUPPORT_PCRE8
133    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134    #endif
135    #ifdef SUPPORT_PCRE16
136    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137    #endif
138    
139  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
140  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
141  external symbols to prevent clashes. */  external symbols to prevent clashes. */
142    
143  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
144  #define _pcre_utf8_table1      utf8_table1  #undef PRIV
145  #define _pcre_utf8_table1_size utf8_table1_size  #define PRIV(name) name
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
146    
147  #include "pcre_tables.c"  #include "pcre_tables.c"
148    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
149  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
150  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
151  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
152  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
153  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
154    
155    #ifdef EBCDIC
156    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157    #else
158    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159    #endif
160    
161  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163    /* Posix support is disabled in 16 bit only mode. */
164    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165    #define NOPOSIX
166    #endif
167    
168  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
169  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 148  Makefile. */ Line 173  Makefile. */
173  #include "pcreposix.h"  #include "pcreposix.h"
174  #endif  #endif
175    
176  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
177  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
180  UTF8 support if PCRE is built without it. */  
181    #ifndef SUPPORT_UTF
182  #ifndef SUPPORT_UTF8  #ifndef NOUTF
183  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
184  #endif  #endif
185  #endif  #endif
186    
187    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189    only from one place and is handled differently). I couldn't dream up any way of
190    using a single macro to do this in a generic way, because of the many different
191    argument requirements. We know that at least one of SUPPORT_PCRE8 and
192    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193    use these in the definitions of generic macros.
194    
195    **** Special note about the PCHARSxxx macros: the address of the string to be
196    printed is always given as two arguments: a base address followed by an offset.
197    The base address is cast to the correct data size for 8 or 16 bit data; the
198    offset is in units of this size. If the string were given as base+offset in one
199    argument, the casting might be incorrectly applied. */
200    
201    #ifdef SUPPORT_PCRE8
202    
203    #define PCHARS8(lv, p, offset, len, f) \
204      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206    #define PCHARSV8(p, offset, len, f) \
207      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210      p = read_capture_name8(p, cn8, re)
211    
212    #define STRLEN8(p) ((int)strlen((char *)p))
213    
214    #define SET_PCRE_CALLOUT8(callout) \
215      pcre_callout = callout
216    
217    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
218       pcre_assign_jit_stack(extra, callback, userdata)
219    
220    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
221      re = pcre_compile((char *)pat, options, error, erroffset, tables)
222    
223    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
224        namesptr, cbuffer, size) \
225      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
226        (char *)namesptr, cbuffer, size)
227    
228    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
229      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
230    
231    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
232        offsets, size_offsets, workspace, size_workspace) \
233      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
234        offsets, size_offsets, workspace, size_workspace)
235    
236    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
237        offsets, size_offsets) \
238      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
239        offsets, size_offsets)
240    
241    #define PCRE_FREE_STUDY8(extra) \
242      pcre_free_study(extra)
243    
244    #define PCRE_FREE_SUBSTRING8(substring) \
245      pcre_free_substring(substring)
246    
247    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
248      pcre_free_substring_list(listptr)
249    
250    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
251        getnamesptr, subsptr) \
252      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
253        (char *)getnamesptr, subsptr)
254    
255    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
256      n = pcre_get_stringnumber(re, (char *)ptr)
257    
258    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
259      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
260    
261    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
262      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
263    
264    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
265      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
266    
267    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
268      pcre_printint(re, outfile, debug_lengths)
269    
270    #define PCRE_STUDY8(extra, re, options, error) \
271      extra = pcre_study(re, options, error)
272    
273    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
274      pcre_jit_stack_alloc(startsize, maxsize)
275    
276    #define PCRE_JIT_STACK_FREE8(stack) \
277      pcre_jit_stack_free(stack)
278    
279    #endif /* SUPPORT_PCRE8 */
280    
281    /* -----------------------------------------------------------*/
282    
283    #ifdef SUPPORT_PCRE16
284    
285    #define PCHARS16(lv, p, offset, len, f) \
286      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
287    
288    #define PCHARSV16(p, offset, len, f) \
289      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
290    
291    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
292      p = read_capture_name16(p, cn16, re)
293    
294    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
295    
296    #define SET_PCRE_CALLOUT16(callout) \
297      pcre16_callout = (int (*)(pcre16_callout_block *))callout
298    
299    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
300      pcre16_assign_jit_stack((pcre16_extra *)extra, \
301        (pcre16_jit_callback)callback, userdata)
302    
303    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
304      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
305        tables)
306    
307    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
308        namesptr, cbuffer, size) \
309      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
310        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
311    
312    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
313      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
314        (PCRE_UCHAR16 *)cbuffer, size/2)
315    
316    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
317        offsets, size_offsets, workspace, size_workspace) \
318      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
319        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
320        workspace, size_workspace)
321    
322    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
323        offsets, size_offsets) \
324      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
325        len, start_offset, options, offsets, size_offsets)
326    
327    #define PCRE_FREE_STUDY16(extra) \
328      pcre16_free_study((pcre16_extra *)extra)
329    
330    #define PCRE_FREE_SUBSTRING16(substring) \
331      pcre16_free_substring((PCRE_SPTR16)substring)
332    
333    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
334      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
335    
336    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
337        getnamesptr, subsptr) \
338      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
339        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
340    
341    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
342      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
343    
344    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
345      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
346        (PCRE_SPTR16 *)(void*)subsptr)
347    
348    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
349      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
350        (PCRE_SPTR16 **)(void*)listptr)
351    
352    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
353      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
354        tables)
355    
356    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
357      pcre16_printint(re, outfile, debug_lengths)
358    
359    #define PCRE_STUDY16(extra, re, options, error) \
360      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
361    
362    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
363      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
364    
365    #define PCRE_JIT_STACK_FREE16(stack) \
366      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
367    
368    #endif /* SUPPORT_PCRE16 */
369    
370    
371    /* ----- Both modes are supported; a runtime test is needed, except for
372    pcre_config(), and the JIT stack functions, when it doesn't matter which
373    version is called. ----- */
374    
375    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
376    
377    #define CHAR_SIZE (use_pcre16? 2:1)
378    
379    #define PCHARS(lv, p, offset, len, f) \
380      if (use_pcre16) \
381        PCHARS16(lv, p, offset, len, f); \
382      else \
383        PCHARS8(lv, p, offset, len, f)
384    
385    #define PCHARSV(p, offset, len, f) \
386      if (use_pcre16) \
387        PCHARSV16(p, offset, len, f); \
388      else \
389        PCHARSV8(p, offset, len, f)
390    
391    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
392      if (use_pcre16) \
393        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
394      else \
395        READ_CAPTURE_NAME8(p, cn8, cn16, re)
396    
397    #define SET_PCRE_CALLOUT(callout) \
398      if (use_pcre16) \
399        SET_PCRE_CALLOUT16(callout); \
400      else \
401        SET_PCRE_CALLOUT8(callout)
402    
403    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
404    
405    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
406      if (use_pcre16) \
407        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
408      else \
409        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
410    
411    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
412      if (use_pcre16) \
413        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
414      else \
415        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
416    
417    #define PCRE_CONFIG pcre_config
418    
419    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
420        namesptr, cbuffer, size) \
421      if (use_pcre16) \
422        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
423          namesptr, cbuffer, size); \
424      else \
425        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
426          namesptr, cbuffer, size)
427    
428    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
429      if (use_pcre16) \
430        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
431      else \
432        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
433    
434    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
435        offsets, size_offsets, workspace, size_workspace) \
436      if (use_pcre16) \
437        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
438          offsets, size_offsets, workspace, size_workspace); \
439      else \
440        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
441          offsets, size_offsets, workspace, size_workspace)
442    
443    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
444        offsets, size_offsets) \
445      if (use_pcre16) \
446        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
447          offsets, size_offsets); \
448      else \
449        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
450          offsets, size_offsets)
451    
452    #define PCRE_FREE_STUDY(extra) \
453      if (use_pcre16) \
454        PCRE_FREE_STUDY16(extra); \
455      else \
456        PCRE_FREE_STUDY8(extra)
457    
458    #define PCRE_FREE_SUBSTRING(substring) \
459      if (use_pcre16) \
460        PCRE_FREE_SUBSTRING16(substring); \
461      else \
462        PCRE_FREE_SUBSTRING8(substring)
463    
464    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
465      if (use_pcre16) \
466        PCRE_FREE_SUBSTRING_LIST16(listptr); \
467      else \
468        PCRE_FREE_SUBSTRING_LIST8(listptr)
469    
470    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
471        getnamesptr, subsptr) \
472      if (use_pcre16) \
473        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
474          getnamesptr, subsptr); \
475      else \
476        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
477          getnamesptr, subsptr)
478    
479    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
480      if (use_pcre16) \
481        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
482      else \
483        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
484    
485    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
486      if (use_pcre16) \
487        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
488      else \
489        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
490    
491    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
492      if (use_pcre16) \
493        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
494      else \
495        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
496    
497    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
498      (use_pcre16 ? \
499         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
500        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
501    
502    #define PCRE_JIT_STACK_FREE(stack) \
503      if (use_pcre16) \
504        PCRE_JIT_STACK_FREE16(stack); \
505      else \
506        PCRE_JIT_STACK_FREE8(stack)
507    
508    #define PCRE_MAKETABLES \
509      (use_pcre16? pcre16_maketables() : pcre_maketables())
510    
511    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
512      if (use_pcre16) \
513        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
514      else \
515        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
516    
517    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
518      if (use_pcre16) \
519        PCRE_PRINTINT16(re, outfile, debug_lengths); \
520      else \
521        PCRE_PRINTINT8(re, outfile, debug_lengths)
522    
523    #define PCRE_STUDY(extra, re, options, error) \
524      if (use_pcre16) \
525        PCRE_STUDY16(extra, re, options, error); \
526      else \
527        PCRE_STUDY8(extra, re, options, error)
528    
529    /* ----- Only 8-bit mode is supported ----- */
530    
531    #elif defined SUPPORT_PCRE8
532    #define CHAR_SIZE                 1
533    #define PCHARS                    PCHARS8
534    #define PCHARSV                   PCHARSV8
535    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
536    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
537    #define STRLEN                    STRLEN8
538    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
539    #define PCRE_COMPILE              PCRE_COMPILE8
540    #define PCRE_CONFIG               pcre_config
541    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
542    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
543    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
544    #define PCRE_EXEC                 PCRE_EXEC8
545    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
546    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
547    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
548    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
549    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
550    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
551    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
552    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
553    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
554    #define PCRE_MAKETABLES           pcre_maketables()
555    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
556    #define PCRE_PRINTINT             PCRE_PRINTINT8
557    #define PCRE_STUDY                PCRE_STUDY8
558    
559    /* ----- Only 16-bit mode is supported ----- */
560    
561    #else
562    #define CHAR_SIZE                 2
563    #define PCHARS                    PCHARS16
564    #define PCHARSV                   PCHARSV16
565    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
566    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
567    #define STRLEN                    STRLEN16
568    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
569    #define PCRE_COMPILE              PCRE_COMPILE16
570    #define PCRE_CONFIG               pcre16_config
571    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
572    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
573    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
574    #define PCRE_EXEC                 PCRE_EXEC16
575    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
576    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
577    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
578    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
579    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
580    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
581    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
582    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
583    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
584    #define PCRE_MAKETABLES           pcre16_maketables()
585    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
586    #define PCRE_PRINTINT             PCRE_PRINTINT16
587    #define PCRE_STUDY                PCRE_STUDY16
588    #endif
589    
590    /* ----- End of mode-specific function call macros ----- */
591    
592    
593  /* Other parameters */  /* Other parameters */
594    
# Line 187  static int debug_lengths; Line 616  static int debug_lengths;
616  static int first_callout;  static int first_callout;
617  static int locale_set = 0;  static int locale_set = 0;
618  static int show_malloc;  static int show_malloc;
619  static int use_utf8;  static int use_utf;
620  static size_t gotten_store;  static size_t gotten_store;
621    static size_t first_gotten_store = 0;
622    static const unsigned char *last_callout_mark = NULL;
623    
624  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
625    
626  static int buffer_size = 50000;  static int buffer_size = 50000;
627  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
628  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
629  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
630    
631    /* Another buffer is needed translation to 16-bit character strings. It will
632    obtained and extended as required. */
633    
634    #ifdef SUPPORT_PCRE16
635    static int buffer16_size = 0;
636    static pcre_uint16 *buffer16 = NULL;
637    
638    #ifdef SUPPORT_PCRE8
639    
640    /* We need the table of operator lengths that is used for 16-bit compiling, in
641    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
642    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
643    appropriately for the 16-bit world. Just as a safety check, make sure that
644    COMPILE_PCRE16 is *not* set. */
645    
646    #ifdef COMPILE_PCRE16
647    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
648    #endif
649    
650    #if LINK_SIZE == 2
651    #undef LINK_SIZE
652    #define LINK_SIZE 1
653    #elif LINK_SIZE == 3 || LINK_SIZE == 4
654    #undef LINK_SIZE
655    #define LINK_SIZE 2
656    #else
657    #error LINK_SIZE must be either 2, 3, or 4
658    #endif
659    
660    #undef IMM2_SIZE
661    #define IMM2_SIZE 1
662    
663    #endif /* SUPPORT_PCRE8 */
664    
665    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
666    #endif  /* SUPPORT_PCRE16 */
667    
668    /* If we have 8-bit support, default use_pcre16 to false; if there is also
669    16-bit support, it can be changed by an option. If there is no 8-bit support,
670    there must be 16-bit support, so default it to 1. */
671    
672    #ifdef SUPPORT_PCRE8
673    static int use_pcre16 = 0;
674    #else
675    static int use_pcre16 = 1;
676    #endif
677    
678    /* Textual explanations for runtime error codes */
679    
680    static const char *errtexts[] = {
681      NULL,  /* 0 is no error */
682      NULL,  /* NOMATCH is handled specially */
683      "NULL argument passed",
684      "bad option value",
685      "magic number missing",
686      "unknown opcode - pattern overwritten?",
687      "no more memory",
688      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
689      "match limit exceeded",
690      "callout error code",
691      NULL,  /* BADUTF8/16 is handled specially */
692      NULL,  /* BADUTF8/16 offset is handled specially */
693      NULL,  /* PARTIAL is handled specially */
694      "not used - internal error",
695      "internal error - pattern overwritten?",
696      "bad count value",
697      "item unsupported for DFA matching",
698      "backreference condition or recursion test not supported for DFA matching",
699      "match limit not supported for DFA matching",
700      "workspace size exceeded in DFA matching",
701      "too much recursion for DFA matching",
702      "recursion limit exceeded",
703      "not used - internal error",
704      "invalid combination of newline options",
705      "bad offset value",
706      NULL,  /* SHORTUTF8/16 is handled specially */
707      "nested recursion at the same subject position",
708      "JIT stack limit reached",
709      "pattern compiled in wrong mode: 8-bit/16-bit error"
710    };
711    
712    
713  /*************************************************  /*************************************************
# Line 210  the L (locale) option also adjusts the t Line 722  the L (locale) option also adjusts the t
722  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
723  only ASCII characters. */  only ASCII characters. */
724    
725  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
726    
727  /* This table is a lower casing table. */  /* This table is a lower casing table. */
728    
# Line 383  graph, print, punct, and cntrl. Other cl Line 895  graph, print, punct, and cntrl. Other cl
895  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
896  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
897    
898  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
899  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
900  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
901  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 546  return sys_errlist[n]; Line 1058  return sys_errlist[n];
1058  #endif /* HAVE_STRERROR */  #endif /* HAVE_STRERROR */
1059    
1060    
1061    /*************************************************
1062    *         JIT memory callback                    *
1063    *************************************************/
1064    
1065    static pcre_jit_stack* jit_callback(void *arg)
1066    {
1067    return (pcre_jit_stack *)arg;
1068    }
1069    
1070    
1071    #if !defined NOUTF || defined SUPPORT_PCRE16
1072    /*************************************************
1073    *            Convert UTF-8 string to value       *
1074    *************************************************/
1075    
1076    /* This function takes one or more bytes that represents a UTF-8 character,
1077    and returns the value of the character.
1078    
1079    Argument:
1080      utf8bytes   a pointer to the byte vector
1081      vptr        a pointer to an int to receive the value
1082    
1083    Returns:      >  0 => the number of bytes consumed
1084                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1085    */
1086    
1087    static int
1088    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1089    {
1090    int c = *utf8bytes++;
1091    int d = c;
1092    int i, j, s;
1093    
1094    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1095      {
1096      if ((d & 0x80) == 0) break;
1097      d <<= 1;
1098      }
1099    
1100    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1101    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1102    
1103    /* i now has a value in the range 1-5 */
1104    
1105    s = 6*i;
1106    d = (c & utf8_table3[i]) << s;
1107    
1108    for (j = 0; j < i; j++)
1109      {
1110      c = *utf8bytes++;
1111      if ((c & 0xc0) != 0x80) return -(j+1);
1112      s -= 6;
1113      d |= (c & 0x3f) << s;
1114      }
1115    
1116    /* Check that encoding was the correct unique one */
1117    
1118    for (j = 0; j < utf8_table1_size; j++)
1119      if (d <= utf8_table1[j]) break;
1120    if (j != i) return -(i+1);
1121    
1122    /* Valid value */
1123    
1124    *vptr = d;
1125    return i+1;
1126    }
1127    #endif /* NOUTF || SUPPORT_PCRE16 */
1128    
1129    
1130    
1131    #if !defined NOUTF || defined SUPPORT_PCRE16
1132    /*************************************************
1133    *       Convert character value to UTF-8         *
1134    *************************************************/
1135    
1136    /* This function takes an integer value in the range 0 - 0x7fffffff
1137    and encodes it as a UTF-8 character in 0 to 6 bytes.
1138    
1139    Arguments:
1140      cvalue     the character value
1141      utf8bytes  pointer to buffer for result - at least 6 bytes long
1142    
1143    Returns:     number of characters placed in the buffer
1144    */
1145    
1146    static int
1147    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1148    {
1149    register int i, j;
1150    for (i = 0; i < utf8_table1_size; i++)
1151      if (cvalue <= utf8_table1[i]) break;
1152    utf8bytes += i;
1153    for (j = i; j > 0; j--)
1154     {
1155     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1156     cvalue >>= 6;
1157     }
1158    *utf8bytes = utf8_table2[i] | cvalue;
1159    return i + 1;
1160    }
1161    #endif
1162    
1163    
1164    #ifdef SUPPORT_PCRE16
1165    /*************************************************
1166    *         Convert a string to 16-bit             *
1167    *************************************************/
1168    
1169    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1170    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1171    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1172    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1173    result is always left in buffer16.
1174    
1175    Note that this function does not object to surrogate values. This is
1176    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1177    for the purpose of testing that they are correctly faulted.
1178    
1179    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1180    in UTF-8 so that values greater than 255 can be handled.
1181    
1182    Arguments:
1183      data       TRUE if converting a data line; FALSE for a regex
1184      p          points to a byte string
1185      utf        true if UTF-8 (to be converted to UTF-16)
1186      len        number of bytes in the string (excluding trailing zero)
1187    
1188    Returns:     number of 16-bit data items used (excluding trailing zero)
1189                 OR -1 if a UTF-8 string is malformed
1190                 OR -2 if a value > 0x10ffff is encountered
1191                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1192    */
1193    
1194    static int
1195    to16(int data, pcre_uint8 *p, int utf, int len)
1196    {
1197    pcre_uint16 *pp;
1198    
1199    if (buffer16_size < 2*len + 2)
1200      {
1201      if (buffer16 != NULL) free(buffer16);
1202      buffer16_size = 2*len + 2;
1203      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1204      if (buffer16 == NULL)
1205        {
1206        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1207        exit(1);
1208        }
1209      }
1210    
1211    pp = buffer16;
1212    
1213    if (!utf && !data)
1214      {
1215      while (len-- > 0) *pp++ = *p++;
1216      }
1217    
1218    else
1219      {
1220      int c = 0;
1221      while (len > 0)
1222        {
1223        int chlen = utf82ord(p, &c);
1224        if (chlen <= 0) return -1;
1225        if (c > 0x10ffff) return -2;
1226        p += chlen;
1227        len -= chlen;
1228        if (c < 0x10000) *pp++ = c; else
1229          {
1230          if (!utf) return -3;
1231          c -= 0x10000;
1232          *pp++ = 0xD800 | (c >> 10);
1233          *pp++ = 0xDC00 | (c & 0x3ff);
1234          }
1235        }
1236      }
1237    
1238    *pp = 0;
1239    return pp - buffer16;
1240    }
1241    #endif
1242    
1243    
1244  /*************************************************  /*************************************************
# Line 571  Returns: pointer to the start of n Line 1264  Returns: pointer to the start of n
1264                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1265  */  */
1266    
1267  static uschar *  static pcre_uint8 *
1268  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1269  {  {
1270  uschar *here = start;  pcre_uint8 *here = start;
1271    
1272  for (;;)  for (;;)
1273    {    {
1274    int rlen = (int)(buffer_size - (here - buffer));    size_t rlen = (size_t)(buffer_size - (here - buffer));
1275    
1276    if (rlen > 1000)    if (rlen > 1000)
1277      {      {
# Line 621  for (;;) Line 1314  for (;;)
1314    else    else
1315      {      {
1316      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1317      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1318      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1319      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1320    
1321      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1322        {        {
# Line 654  return NULL; /* Control never gets here Line 1347  return NULL; /* Control never gets here
1347    
1348    
1349    
   
   
   
   
1350  /*************************************************  /*************************************************
1351  *          Read number from string               *  *          Read number from string               *
1352  *************************************************/  *************************************************/
# Line 674  Returns: the unsigned long Line 1363  Returns: the unsigned long
1363  */  */
1364    
1365  static int  static int
1366  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1367  {  {
1368  int result = 0;  int result = 0;
1369  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 685  return(result); Line 1374  return(result);
1374    
1375    
1376    
   
1377  /*************************************************  /*************************************************
1378  *            Convert UTF-8 string to value       *  *             Print one character                *
1379  *************************************************/  *************************************************/
1380    
1381  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
1382    
1383  Returns:      >  0 => the number of bytes consumed  static int pchar(int c, FILE *f)
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1384  {  {
1385  int c = *utf8bytes++;  if (PRINTOK(c))
1386  int d = c;    {
1387  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1388      return 1;
1389      }
1390    
1391  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1392    {    {
1393    if ((d & 0x80) == 0) break;    if (use_utf)
1394    d <<= 1;      {
1395        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1396        return 6;
1397        }
1398      else
1399        {
1400        if (f != NULL) fprintf(f, "\\x%02x", c);
1401        return 4;
1402        }
1403    }    }
1404    
1405  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1406  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1407           (c <= 0x00000fff)? 7 :
1408           (c <= 0x0000ffff)? 8 :
1409           (c <= 0x000fffff)? 9 : 10;
1410    }
1411    
 /* i now has a value in the range 1-5 */  
1412    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1413    
1414  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1415    {  /*************************************************
1416    c = *utf8bytes++;  *         Print 8-bit character string           *
1417    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1418    
1419  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1420    If handed a NULL file, just counts chars without printing. */
1421    
1422  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1423    if (d <= utf8_table1[j]) break;  {
1424  if (j != i) return -(i+1);  int c = 0;
1425    int yield = 0;
1426    
1427  /* Valid value */  if (length < 0)
1428      length = strlen((char *)p);
1429    
1430  *vptr = d;  while (length-- > 0)
1431  return i+1;    {
1432  }  #if !defined NOUTF
1433      if (use_utf)
1434        {
1435        int rc = utf82ord(p, &c);
1436        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1437          {
1438          length -= rc - 1;
1439          p += rc;
1440          yield += pchar(c, f);
1441          continue;
1442          }
1443        }
1444    #endif
1445      c = *p++;
1446      yield += pchar(c, f);
1447      }
1448    
1449    return yield;
1450    }
1451  #endif  #endif
1452    
1453    
1454    
1455    #ifdef SUPPORT_PCRE16
1456  /*************************************************  /*************************************************
1457  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1458  *************************************************/  *************************************************/
1459    
1460  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1461  {  {
1462  register int i, j;  int len = 0;
1463  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1464    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1465  }  }
1466    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1467    
1468    
1469    #ifdef SUPPORT_PCRE16
1470  /*************************************************  /*************************************************
1471  *             Print character string             *  *           Print 16-bit character string        *
1472  *************************************************/  *************************************************/
1473    
1474  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1475  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1476    
1477  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1478  {  {
 int c = 0;  
1479  int yield = 0;  int yield = 0;
1480    
1481    if (length < 0)
1482      length = strlen16(p);
1483    
1484  while (length-- > 0)  while (length-- > 0)
1485    {    {
1486  #if !defined NOUTF8    int c = *p++ & 0xffff;
1487    if (use_utf8)  #if !defined NOUTF
1488      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1489      {      {
1490      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1491        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1492        {        {
1493        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1494        p += rc;        length--;
1495        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1496        }        }
1497      }      }
1498  #endif  #endif
1499      yield += pchar(c, f);
1500      }
1501    
1502     /* Not UTF-8, or malformed UTF-8  */  return yield;
1503    }
1504    #endif  /* SUPPORT_PCRE16 */
1505    
1506    c = *p++;  
1507    if (PRINTHEX(c))  
1508      {  #ifdef SUPPORT_PCRE8
1509      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1510      yield++;  *     Read a capture name (8-bit) and check it   *
1511      }  *************************************************/
1512    else  
1513      {  static pcre_uint8 *
1514      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1515      yield += 4;  {
1516      }  pcre_uint8 *npp = *pp;
1517    while (isalnum(*p)) *npp++ = *p++;
1518    *npp++ = 0;
1519    *npp = 0;
1520    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1521      {
1522      fprintf(outfile, "no parentheses with name \"");
1523      PCHARSV(*pp, 0, -1, outfile);
1524      fprintf(outfile, "\"\n");
1525    }    }
1526    
1527  return yield;  *pp = npp;
1528    return p;
1529  }  }
1530    #endif  /* SUPPORT_PCRE8 */
1531    
1532    
1533    
1534    #ifdef SUPPORT_PCRE16
1535    /*************************************************
1536    *     Read a capture name (16-bit) and check it  *
1537    *************************************************/
1538    
1539    /* Note that the text being read is 8-bit. */
1540    
1541    static pcre_uint8 *
1542    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1543    {
1544    pcre_uint16 *npp = *pp;
1545    while (isalnum(*p)) *npp++ = *p++;
1546    *npp++ = 0;
1547    *npp = 0;
1548    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1549      {
1550      fprintf(outfile, "no parentheses with name \"");
1551      PCHARSV(*pp, 0, -1, outfile);
1552      fprintf(outfile, "\"\n");
1553      }
1554    *pp = npp;
1555    return p;
1556    }
1557    #endif  /* SUPPORT_PCRE16 */
1558    
1559    
1560    
# Line 872  if (callout_extra) Line 1583  if (callout_extra)
1583      else      else
1584        {        {
1585        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1586        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1587          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1588        fprintf(f, "\n");        fprintf(f, "\n");
1589        }        }
# Line 885  printed lengths of the substrings. */ Line 1596  printed lengths of the substrings. */
1596    
1597  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1598    
1599  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1600  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1601    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1602    
1603  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1604    
1605  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1606    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1607    
1608  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 928  fprintf(outfile, "%.*s", (cb->next_item_ Line 1639  fprintf(outfile, "%.*s", (cb->next_item_
1639  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1640  first_callout = 0;  first_callout = 0;
1641    
1642    if (cb->mark != last_callout_mark)
1643      {
1644      if (cb->mark == NULL)
1645        fprintf(outfile, "Latest Mark: <unset>\n");
1646      else
1647        {
1648        fprintf(outfile, "Latest Mark: ");
1649        PCHARSV(cb->mark, 0, -1, outfile);
1650        putc('\n', outfile);
1651        }
1652      last_callout_mark = cb->mark;
1653      }
1654    
1655  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1656    {    {
1657    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 947  return (cb->callout_number != callout_fa Line 1671  return (cb->callout_number != callout_fa
1671  *            Local malloc functions              *  *            Local malloc functions              *
1672  *************************************************/  *************************************************/
1673    
1674  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1675  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1676    show_malloc variable is set only during matching. */
1677    
1678  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1679  {  {
1680  void *block = malloc(size);  void *block = malloc(size);
1681  gotten_store = size;  gotten_store = size;
1682    if (first_gotten_store == 0) first_gotten_store = size;
1683  if (show_malloc)  if (show_malloc)
1684    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1685  return block;  return block;
# Line 966  if (show_malloc) Line 1692  if (show_malloc)
1692  free(block);  free(block);
1693  }  }
1694    
   
1695  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1696    
1697  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 989  free(block); Line 1714  free(block);
1714  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1715  *************************************************/  *************************************************/
1716    
1717  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1718    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1719    value, but the code is defensive.
1720    
1721    Arguments:
1722      re        compiled regex
1723      study     study data
1724      option    PCRE_INFO_xxx option
1725      ptr       where to put the data
1726    
1727  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  Returns:    0 when OK, < 0 on error
1728    */
1729    
1730    static int
1731    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1732  {  {
1733  int rc;  int rc;
1734  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1735    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1736    #ifdef SUPPORT_PCRE16
1737      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1738    #else
1739      rc = PCRE_ERROR_BADMODE;
1740    #endif
1741    else
1742    #ifdef SUPPORT_PCRE8
1743      rc = pcre_fullinfo(re, study, option, ptr);
1744    #else
1745      rc = PCRE_ERROR_BADMODE;
1746    #endif
1747    
1748    if (rc < 0)
1749      {
1750      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1751        use_pcre16? "16" : "", option);
1752      if (rc == PCRE_ERROR_BADMODE)
1753        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1754          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1755      }
1756    
1757    return rc;
1758  }  }
1759    
1760    
1761    
1762  /*************************************************  /*************************************************
1763  *         Byte flipping function                 *  *             Swap byte functions                *
1764  *************************************************/  *************************************************/
1765    
1766  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1767  byteflip(unsigned long int value, int n)  value, respectively.
1768    
1769    Arguments:
1770      value        any number
1771    
1772    Returns:       the byte swapped value
1773    */
1774    
1775    static pcre_uint32
1776    swap_uint32(pcre_uint32 value)
1777  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1778  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1779         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1780         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1781         ((value & 0xff000000) >> 24);         (value >> 24);
1782  }  }
1783    
1784    static pcre_uint16
1785    swap_uint16(pcre_uint16 value)
1786    {
1787    return (value >> 8) | (value << 8);
1788    }
1789    
1790    
1791    
1792    /*************************************************
1793    *        Flip bytes in a compiled pattern        *
1794    *************************************************/
1795    
1796    /* This function is called if the 'F' option was present on a pattern that is
1797    to be written to a file. We flip the bytes of all the integer fields in the
1798    regex data block and the study block. In 16-bit mode this also flips relevant
1799    bytes in the pattern itself. This is to make it possible to test PCRE's
1800    ability to reload byte-flipped patterns, e.g. those compiled on a different
1801    architecture. */
1802    
1803    static void
1804    regexflip(pcre *ere, pcre_extra *extra)
1805    {
1806    REAL_PCRE *re = (REAL_PCRE *)ere;
1807    #ifdef SUPPORT_PCRE16
1808    int op;
1809    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1810    int length = re->name_count * re->name_entry_size;
1811    #ifdef SUPPORT_UTF
1812    BOOL utf = (re->options & PCRE_UTF16) != 0;
1813    BOOL utf16_char = FALSE;
1814    #endif /* SUPPORT_UTF */
1815    #endif /* SUPPORT_PCRE16 */
1816    
1817    /* Always flip the bytes in the main data block and study blocks. */
1818    
1819    re->magic_number = REVERSED_MAGIC_NUMBER;
1820    re->size = swap_uint32(re->size);
1821    re->options = swap_uint32(re->options);
1822    re->flags = swap_uint16(re->flags);
1823    re->top_bracket = swap_uint16(re->top_bracket);
1824    re->top_backref = swap_uint16(re->top_backref);
1825    re->first_char = swap_uint16(re->first_char);
1826    re->req_char = swap_uint16(re->req_char);
1827    re->name_table_offset = swap_uint16(re->name_table_offset);
1828    re->name_entry_size = swap_uint16(re->name_entry_size);
1829    re->name_count = swap_uint16(re->name_count);
1830    
1831    if (extra != NULL)
1832      {
1833      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1834      rsd->size = swap_uint32(rsd->size);
1835      rsd->flags = swap_uint32(rsd->flags);
1836      rsd->minlength = swap_uint32(rsd->minlength);
1837      }
1838    
1839    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1840    in the name table, if present, and then in the pattern itself. */
1841    
1842    #ifdef SUPPORT_PCRE16
1843    if (!use_pcre16) return;
1844    
1845    while(TRUE)
1846      {
1847      /* Swap previous characters. */
1848      while (length-- > 0)
1849        {
1850        *ptr = swap_uint16(*ptr);
1851        ptr++;
1852        }
1853    #ifdef SUPPORT_UTF
1854      if (utf16_char)
1855        {
1856        if ((ptr[-1] & 0xfc00) == 0xd800)
1857          {
1858          /* We know that there is only one extra character in UTF-16. */
1859          *ptr = swap_uint16(*ptr);
1860          ptr++;
1861          }
1862        }
1863      utf16_char = FALSE;
1864    #endif /* SUPPORT_UTF */
1865    
1866      /* Get next opcode. */
1867    
1868      length = 0;
1869      op = *ptr;
1870      *ptr++ = swap_uint16(op);
1871    
1872      switch (op)
1873        {
1874        case OP_END:
1875        return;
1876    
1877    #ifdef SUPPORT_UTF
1878        case OP_CHAR:
1879        case OP_CHARI:
1880        case OP_NOT:
1881        case OP_NOTI:
1882        case OP_STAR:
1883        case OP_MINSTAR:
1884        case OP_PLUS:
1885        case OP_MINPLUS:
1886        case OP_QUERY:
1887        case OP_MINQUERY:
1888        case OP_UPTO:
1889        case OP_MINUPTO:
1890        case OP_EXACT:
1891        case OP_POSSTAR:
1892        case OP_POSPLUS:
1893        case OP_POSQUERY:
1894        case OP_POSUPTO:
1895        case OP_STARI:
1896        case OP_MINSTARI:
1897        case OP_PLUSI:
1898        case OP_MINPLUSI:
1899        case OP_QUERYI:
1900        case OP_MINQUERYI:
1901        case OP_UPTOI:
1902        case OP_MINUPTOI:
1903        case OP_EXACTI:
1904        case OP_POSSTARI:
1905        case OP_POSPLUSI:
1906        case OP_POSQUERYI:
1907        case OP_POSUPTOI:
1908        case OP_NOTSTAR:
1909        case OP_NOTMINSTAR:
1910        case OP_NOTPLUS:
1911        case OP_NOTMINPLUS:
1912        case OP_NOTQUERY:
1913        case OP_NOTMINQUERY:
1914        case OP_NOTUPTO:
1915        case OP_NOTMINUPTO:
1916        case OP_NOTEXACT:
1917        case OP_NOTPOSSTAR:
1918        case OP_NOTPOSPLUS:
1919        case OP_NOTPOSQUERY:
1920        case OP_NOTPOSUPTO:
1921        case OP_NOTSTARI:
1922        case OP_NOTMINSTARI:
1923        case OP_NOTPLUSI:
1924        case OP_NOTMINPLUSI:
1925        case OP_NOTQUERYI:
1926        case OP_NOTMINQUERYI:
1927        case OP_NOTUPTOI:
1928        case OP_NOTMINUPTOI:
1929        case OP_NOTEXACTI:
1930        case OP_NOTPOSSTARI:
1931        case OP_NOTPOSPLUSI:
1932        case OP_NOTPOSQUERYI:
1933        case OP_NOTPOSUPTOI:
1934        if (utf) utf16_char = TRUE;
1935    #endif
1936        /* Fall through. */
1937    
1938        default:
1939        length = OP_lengths16[op] - 1;
1940        break;
1941    
1942        case OP_CLASS:
1943        case OP_NCLASS:
1944        /* Skip the character bit map. */
1945        ptr += 32/sizeof(pcre_uint16);
1946        length = 0;
1947        break;
1948    
1949        case OP_XCLASS:
1950        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1951        if (LINK_SIZE > 1)
1952          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1953            - (1 + LINK_SIZE + 1));
1954        else
1955          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1956    
1957        /* Reverse the size of the XCLASS instance. */
1958        *ptr = swap_uint16(*ptr);
1959        ptr++;
1960        if (LINK_SIZE > 1)
1961          {
1962          *ptr = swap_uint16(*ptr);
1963          ptr++;
1964          }
1965    
1966        op = *ptr;
1967        *ptr = swap_uint16(op);
1968        ptr++;
1969        if ((op & XCL_MAP) != 0)
1970          {
1971          /* Skip the character bit map. */
1972          ptr += 32/sizeof(pcre_uint16);
1973          length -= 32/sizeof(pcre_uint16);
1974          }
1975        break;
1976        }
1977      }
1978    /* Control should never reach here in 16 bit mode. */
1979    #endif /* SUPPORT_PCRE16 */
1980    }
1981    
1982    
1983    
# Line 1022  return ((value & 0x000000ff) << 24) | Line 1986  return ((value & 0x000000ff) << 24) |
1986  *************************************************/  *************************************************/
1987    
1988  static int  static int
1989  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1990    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1991    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1992  {  {
# Line 1037  for (;;) Line 2001  for (;;)
2001    {    {
2002    *limit = mid;    *limit = mid;
2003    
2004    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2005      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2006    
2007    if (count == errnumber)    if (count == errnumber)
# Line 1082  Returns: < 0, = 0, or > 0, according Line 2046  Returns: < 0, = 0, or > 0, according
2046  */  */
2047    
2048  static int  static int
2049  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2050  {  {
2051  while (n--)  while (n--)
2052    {    {
# Line 1109  Returns: appropriate PCRE_NEWLINE_x Line 2073  Returns: appropriate PCRE_NEWLINE_x
2073  */  */
2074    
2075  static int  static int
2076  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2077  {  {
2078  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2079  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2080  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2081  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2082  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2083  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2084  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2085  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2086  return 0;  return 0;
2087  }  }
# Line 1139  printf("If input is a terminal, readline Line 2103  printf("If input is a terminal, readline
2103  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2104  #endif  #endif
2105  printf("\nOptions:\n");  printf("\nOptions:\n");
2106  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2107    printf("  -16      use the 16-bit library\n");
2108    #endif
2109    printf("  -b       show compiled code\n");
2110  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2111    printf("  -C arg   show a specific compile-time option\n");
2112    printf("           and exit with its value. The arg can be:\n");
2113    printf("     linksize     internal link size [2, 3, 4]\n");
2114    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2115    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2116    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2117    printf("     ucp          Unicode Properties supported [0, 1]\n");
2118    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2119    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2120  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2121  #if !defined NODFA  #if !defined NODFA
2122  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1155  printf(" -p use POSIX interface\n Line 2131  printf(" -p use POSIX interface\n
2131  #endif  #endif
2132  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2133  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2134  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2135           "  -s+      force each pattern to be studied, using JIT if available\n"
2136         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2137  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2138  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1175  options, followed by a set of test data, Line 2152  options, followed by a set of test data,
2152  int main(int argc, char **argv)  int main(int argc, char **argv)
2153  {  {
2154  FILE *infile = stdin;  FILE *infile = stdin;
2155    const char *version;
2156  int options = 0;  int options = 0;
2157  int study_options = 0;  int study_options = 0;
2158  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1183  int timeit = 0; Line 2161  int timeit = 0;
2161  int timeitm = 0;  int timeitm = 0;
2162  int showinfo = 0;  int showinfo = 0;
2163  int showstore = 0;  int showstore = 0;
2164    int force_study = -1;
2165    int force_study_options = 0;
2166  int quiet = 0;  int quiet = 0;
2167  int size_offsets = 45;  int size_offsets = 45;
2168  int size_offsets_max;  int size_offsets_max;
# Line 1196  int all_use_dfa = 0; Line 2176  int all_use_dfa = 0;
2176  int yield = 0;  int yield = 0;
2177  int stack_size;  int stack_size;
2178    
2179  /* These vectors store, end-to-end, a list of captured substring names. Assume  pcre_jit_stack *jit_stack = NULL;
 that 1024 is plenty long enough for the few names we'll be testing. */  
   
 uschar copynames[1024];  
 uschar getnames[1024];  
2180    
2181  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2182  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2183    that 1024 is plenty long enough for the few names we'll be testing. It is
2184    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2185    for the actual memory, to ensure alignment. */
2186    
2187    pcre_uint16 copynames[1024];
2188    pcre_uint16 getnames[1024];
2189    
2190    #ifdef SUPPORT_PCRE16
2191    pcre_uint16 *cn16ptr;
2192    pcre_uint16 *gn16ptr;
2193    #endif
2194    
2195  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2196  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2197    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2198    pcre_uint8 *cn8ptr;
2199    pcre_uint8 *gn8ptr;
2200    #endif
2201    
2202  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2203  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2204  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2205    
2206    buffer = (pcre_uint8 *)malloc(buffer_size);
2207    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2208    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2209    
2210  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2211    
# Line 1225  it set 0x8000, but then I was advised th Line 2220  it set 0x8000, but then I was advised th
2220  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2221  #endif  #endif
2222    
2223    /* Get the version number: both pcre_version() and pcre16_version() give the
2224    same answer. We just need to ensure that we call one that is available. */
2225    
2226    #ifdef SUPPORT_PCRE8
2227    version = pcre_version();
2228    #else
2229    version = pcre16_version();
2230    #endif
2231    
2232  /* Scan options */  /* Scan options */
2233    
2234  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2235    {    {
2236    unsigned char *endptr;    pcre_uint8 *endptr;
2237    
2238    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-m") == 0) showstore = 1;
2239      showstore = 1;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
2240      else if (strcmp(argv[op], "-s+") == 0)
2241        {
2242        force_study = 1;
2243        force_study_options = PCRE_STUDY_JIT_COMPILE;
2244        }
2245      else if (strcmp(argv[op], "-16") == 0)
2246        {
2247    #ifdef SUPPORT_PCRE16
2248        use_pcre16 = 1;
2249    #else
2250        printf("** This version of PCRE was built without 16-bit support\n");
2251        exit(1);
2252    #endif
2253        }
2254    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2255    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
2256    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
# Line 1242  while (argc > 1 && argv[op][0] == '-') Line 2260  while (argc > 1 && argv[op][0] == '-')
2260    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2261  #endif  #endif
2262    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2263        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2264          *endptr == 0))          *endptr == 0))
2265      {      {
2266      op++;      op++;
# Line 1252  while (argc > 1 && argv[op][0] == '-') Line 2270  while (argc > 1 && argv[op][0] == '-')
2270      {      {
2271      int both = argv[op][2] == 0;      int both = argv[op][2] == 0;
2272      int temp;      int temp;
2273      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2274                       *endptr == 0))                       *endptr == 0))
2275        {        {
2276        timeitm = temp;        timeitm = temp;
# Line 1263  while (argc > 1 && argv[op][0] == '-') Line 2281  while (argc > 1 && argv[op][0] == '-')
2281      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2282      }      }
2283    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2284        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2285          *endptr == 0))          *endptr == 0))
2286      {      {
2287  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2288      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2289      exit(1);      exit(1);
2290  #else  #else
# Line 1291  while (argc > 1 && argv[op][0] == '-') Line 2309  while (argc > 1 && argv[op][0] == '-')
2309      {      {
2310      int rc;      int rc;
2311      unsigned long int lrc;      unsigned long int lrc;
2312      printf("PCRE version %s\n", pcre_version());  
2313        if (argc > 2)
2314          {
2315          if (strcmp(argv[op + 1], "linksize") == 0)
2316            {
2317            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2318            printf("%d\n", rc);
2319            yield = rc;
2320            goto EXIT;
2321            }
2322          if (strcmp(argv[op + 1], "pcre8") == 0)
2323            {
2324    #ifdef SUPPORT_PCRE8
2325            printf("1\n");
2326            yield = 1;
2327    #else
2328            printf("0\n");
2329            yield = 0;
2330    #endif
2331            goto EXIT;
2332            }
2333          if (strcmp(argv[op + 1], "pcre16") == 0)
2334            {
2335    #ifdef SUPPORT_PCRE16
2336            printf("1\n");
2337            yield = 1;
2338    #else
2339            printf("0\n");
2340            yield = 0;
2341    #endif
2342            goto EXIT;
2343            }
2344          if (strcmp(argv[op + 1], "utf") == 0)
2345            {
2346    #ifdef SUPPORT_PCRE8
2347            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2348            printf("%d\n", rc);
2349            yield = rc;
2350    #else
2351            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2352            printf("%d\n", rc);
2353            yield = rc;
2354    #endif
2355            goto EXIT;
2356            }
2357          if (strcmp(argv[op + 1], "ucp") == 0)
2358            {
2359            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2360            printf("%d\n", rc);
2361            yield = rc;
2362            goto EXIT;
2363            }
2364          if (strcmp(argv[op + 1], "jit") == 0)
2365            {
2366            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2367            printf("%d\n", rc);
2368            yield = rc;
2369            goto EXIT;
2370            }
2371          if (strcmp(argv[op + 1], "newline") == 0)
2372            {
2373            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2374            /* Note that these values are always the ASCII values, even
2375            in EBCDIC environments. CR is 13 and NL is 10. */
2376            printf("%s\n", (rc == 13)? "CR" :
2377              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2378              (rc == -2)? "ANYCRLF" :
2379              (rc == -1)? "ANY" : "???");
2380            goto EXIT;
2381            }
2382          printf("Unknown -C option: %s\n", argv[op + 1]);
2383          goto EXIT;
2384          }
2385    
2386        printf("PCRE version %s\n", version);
2387      printf("Compiled with\n");      printf("Compiled with\n");
2388    
2389    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2390    are set, either both UTFs are supported or both are not supported. */
2391    
2392    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2393        printf("  8-bit and 16-bit support\n");
2394        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2395        if (rc)
2396          printf("  UTF-8 and UTF-16 support\n");
2397        else
2398          printf("  No UTF-8 or UTF-16 support\n");
2399    #elif defined SUPPORT_PCRE8
2400        printf("  8-bit support only\n");
2401      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2402      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2403      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2404        printf("  16-bit support only\n");
2405        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2406        printf("  %sUTF-16 support\n", rc? "" : "No ");
2407    #endif
2408    
2409        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2410      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2411      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2412        if (rc)
2413          {
2414          const char *arch;
2415          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, &arch);
2416          printf("  Just-in-time compiler support: %s\n", arch);
2417          }
2418        else
2419          printf("  No just-in-time compiler support\n");
2420        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2421      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2422      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2423      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2424        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2425        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2426        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2427      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2428      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2429                                       "all Unicode newlines");                                       "all Unicode newlines");
2430      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2431      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2432      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2433      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2434      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2435      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2436      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2437      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2438      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2439      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2440        if (showstore)
2441          {
2442          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2443          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2444          }
2445        printf("\n");
2446      goto EXIT;      goto EXIT;
2447      }      }
2448    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(argv[op], "-help") == 0 ||
# Line 1374  if (argc > 2) Line 2500  if (argc > 2)
2500    
2501  /* Set alternative malloc function */  /* Set alternative malloc function */
2502    
2503    #ifdef SUPPORT_PCRE8
2504  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2505  pcre_free = new_free;  pcre_free = new_free;
2506  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2507  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2508    #endif
2509    
2510    #ifdef SUPPORT_PCRE16
2511    pcre16_malloc = new_malloc;
2512    pcre16_free = new_free;
2513    pcre16_stack_malloc = stack_malloc;
2514    pcre16_stack_free = stack_free;
2515    #endif
2516    
2517  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2518    
2519  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2520    
2521  /* Main loop */  /* Main loop */
2522    
# Line 1396  while (!done) Line 2531  while (!done)
2531  #endif  #endif
2532    
2533    const char *error;    const char *error;
2534    unsigned char *markptr;    pcre_uint8 *markptr;
2535    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2536    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2537    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2538      unsigned long int get_options;
2539    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2540    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2541      int do_allcaps = 0;
2542    int do_mark = 0;    int do_mark = 0;
2543    int do_study = 0;    int do_study = 0;
2544      int no_force_study = 0;
2545    int do_debug = debug;    int do_debug = debug;
2546    int do_G = 0;    int do_G = 0;
2547    int do_g = 0;    int do_g = 0;
2548    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2549    int do_showrest = 0;    int do_showrest = 0;
2550      int do_showcaprest = 0;
2551    int do_flip = 0;    int do_flip = 0;
2552    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2553    
2554    use_utf8 = 0;    use_utf = 0;
2555    debug_lengths = 1;    debug_lengths = 1;
2556    
2557    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1427  while (!done) Line 2566  while (!done)
2566    
2567    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2568      {      {
2569      unsigned long int magic, get_options;      pcre_uint32 magic;
2570      uschar sbuf[8];      pcre_uint8 sbuf[8];
2571      FILE *f;      FILE *f;
2572    
2573      p++;      p++;
2574        if (*p == '!')
2575          {
2576          do_debug = TRUE;
2577          do_showinfo = TRUE;
2578          p++;
2579          }
2580    
2581      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2582      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2583      *pp = 0;      *pp = 0;
# Line 1443  while (!done) Line 2589  while (!done)
2589        continue;        continue;
2590        }        }
2591    
2592        first_gotten_store = 0;
2593      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2594    
2595      true_size =      true_size =
# Line 1450  while (!done) Line 2597  while (!done)
2597      true_study_size =      true_study_size =
2598        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2599    
2600      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2601      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2602    
2603      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2604    
2605      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2606      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2607        {        {
2608        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2609          {          {
2610          do_flip = 1;          do_flip = 1;
2611          }          }
# Line 1470  while (!done) Line 2617  while (!done)
2617          }          }
2618        }        }
2619    
2620      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2621        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2622          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
2623    
2624      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
2625    
2626      if (true_study_size != 0)      if (true_study_size != 0)
2627        {        {
# Line 1494  while (!done) Line 2637  while (!done)
2637          {          {
2638          FAIL_READ:          FAIL_READ:
2639          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2640          if (extra != NULL) new_free(extra);          if (extra != NULL)
2641              {
2642              PCRE_FREE_STUDY(extra);
2643              }
2644          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2645          fclose(f);          fclose(f);
2646          continue;          continue;
# Line 1504  while (!done) Line 2650  while (!done)
2650        }        }
2651      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2652    
2653        /* Flip the necessary bytes. */
2654        if (do_flip)
2655          {
2656          int rc;
2657          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2658          if (rc == PCRE_ERROR_BADMODE)
2659            {
2660            /* Simulate the result of the function call below. */
2661            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2662              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2663            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2664              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2665            continue;
2666            }
2667          }
2668    
2669        /* Need to know if UTF-8 for printing data strings. */
2670    
2671        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2672        use_utf = (get_options & PCRE_UTF8) != 0;
2673    
2674      fclose(f);      fclose(f);
2675      goto SHOW_INFO;      goto SHOW_INFO;
2676      }      }
2677    
2678    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2679    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2680    
2681    delimiter = *p++;    delimiter = *p++;
2682    
# Line 1574  while (!done) Line 2741  while (!done)
2741        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2742        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2743    
2744        case '+': do_showrest = 1; break;        case '+':
2745          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2746          break;
2747    
2748          case '=': do_allcaps = 1; break;
2749        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2750        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2751        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1592  while (!done) Line 2763  while (!done)
2763        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2764  #endif  #endif
2765    
2766        case 'S': do_study = 1; break;        case 'S':
2767          if (do_study == 0)
2768            {
2769            do_study = 1;
2770            if (*pp == '+')
2771              {
2772              study_options |= PCRE_STUDY_JIT_COMPILE;
2773              pp++;
2774              }
2775            }
2776          else
2777            {
2778            do_study = 0;
2779            no_force_study = 1;
2780            }
2781          break;
2782    
2783        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
2784        case 'W': options |= PCRE_UCP; break;        case 'W': options |= PCRE_UCP; break;
2785        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2786        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2787        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2788        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2789        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2790    
2791        case 'T':        case 'T':
# Line 1632  while (!done) Line 2819  while (!done)
2819          goto SKIP_DATA;          goto SKIP_DATA;
2820          }          }
2821        locale_set = 1;        locale_set = 1;
2822        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2823        pp = ppp;        pp = ppp;
2824        break;        break;
2825    
# Line 1645  while (!done) Line 2832  while (!done)
2832    
2833        case '<':        case '<':
2834          {          {
2835          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2836            {            {
2837            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2838            pp += 3;            pp += 3;
# Line 1673  while (!done) Line 2860  while (!done)
2860    
2861    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2862    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2863    local character tables. */    local character tables. Neither does it have 16-bit support. */
2864    
2865  #if !defined NOPOSIX  #if !defined NOPOSIX
2866    if (posix || do_posix)    if (posix || do_posix)
# Line 1689  while (!done) Line 2876  while (!done)
2876      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2877      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2878    
2879        first_gotten_store = 0;
2880      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2881    
2882      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1708  while (!done) Line 2896  while (!done)
2896  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2897    
2898      {      {
2899      unsigned long int get_options;      /* In 16-bit mode, convert the input. */
2900    
2901    #ifdef SUPPORT_PCRE16
2902        if (use_pcre16)
2903          {
2904          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2905            {
2906            case -1:
2907            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2908              "converted to UTF-16\n");
2909            goto SKIP_DATA;
2910    
2911            case -2:
2912            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2913              "cannot be converted to UTF-16\n");
2914            goto SKIP_DATA;
2915    
2916            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2917            fprintf(outfile, "**Failed: character value greater than 0xffff "
2918              "cannot be converted to 16-bit in non-UTF mode\n");
2919            goto SKIP_DATA;
2920    
2921            default:
2922            break;
2923            }
2924          p = (pcre_uint8 *)buffer16;
2925          }
2926    #endif
2927    
2928        /* Compile many times when timing */
2929    
2930      if (timeit > 0)      if (timeit > 0)
2931        {        {
# Line 1717  while (!done) Line 2934  while (!done)
2934        clock_t start_time = clock();        clock_t start_time = clock();
2935        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2936          {          {
2937          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2938          if (re != NULL) free(re);          if (re != NULL) free(re);
2939          }          }
2940        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1726  while (!done) Line 2943  while (!done)
2943            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2944        }        }
2945    
2946      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2947        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2948    
2949      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2950      if non-interactive. */      if non-interactive. */
# Line 1757  while (!done) Line 2975  while (!done)
2975      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
2976      lines. */      lines. */
2977    
2978      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2979      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
2980        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
2981    
2982      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
2983      and remember the store that was got. */      and remember the store that was got. */
2984    
2985      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
2986      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2987    
2988        /* Output code size information if requested */
2989    
2990      /* If /S was present, study the regexp to generate additional info to      if (log_store)
2991      help with the matching. */        fprintf(outfile, "Memory allocation (code space): %d\n",
2992            (int)(first_gotten_store -
2993                  sizeof(REAL_PCRE) -
2994                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
2995    
2996        /* If -s or /S was present, study the regex to generate additional info to
2997        help with the matching, unless the pattern has the SS option, which
2998        suppresses the effect of /S (used for a few test patterns where studying is
2999        never sensible). */
3000    
3001      if (do_study)      if (do_study || (force_study >= 0 && !no_force_study))
3002        {        {
3003        if (timeit > 0)        if (timeit > 0)
3004          {          {
# Line 1787  while (!done) Line 3006  while (!done)
3006          clock_t time_taken;          clock_t time_taken;
3007          clock_t start_time = clock();          clock_t start_time = clock();
3008          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3009            extra = pcre_study(re, study_options, &error);            {
3010              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3011              }
3012          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3013          if (extra != NULL) free(extra);          if (extra != NULL)
3014              {
3015              PCRE_FREE_STUDY(extra);
3016              }
3017          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3018            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3019              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3020          }          }
3021        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3022        if (error != NULL)        if (error != NULL)
3023          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3024        else if (extra != NULL)        else if (extra != NULL)
3025            {
3026          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3027            if (log_store)
3028              {
3029              size_t jitsize;
3030              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3031                  jitsize != 0)
3032                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3033              }
3034            }
3035        }        }
3036    
3037      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1814  while (!done) Line 3047  while (!done)
3047        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3048        }        }
3049    
3050      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3051    
3052      SHOW_INFO:      SHOW_INFO:
3053    
3054      if (do_debug)      if (do_debug)
3055        {        {
3056        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3057        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3058        }        }
3059    
3060      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1866  while (!done) Line 3062  while (!done)
3062      if (do_showinfo)      if (do_showinfo)
3063        {        {
3064        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3065        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3066          hascrorlf;          hascrorlf;
3067        int nameentrysize, namecount;        int nameentrysize, namecount;
3068        const uschar *nametable;        const pcre_uint8 *nametable;
3069    
3070        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3071        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3072        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3073        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3074        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3075        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3076        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3077        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3078        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3079        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3080        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3081              != 0)
3082  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3083    
3084        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3085          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1919  while (!done) Line 3094  while (!done)
3094          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3095          while (namecount-- > 0)          while (namecount-- > 0)
3096            {            {
3097            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3098              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3099              GET2(nametable, 0));  #else
3100              int imm2_size = IMM2_SIZE;
3101    #endif
3102              int length = (int)STRLEN(nametable + imm2_size);
3103              fprintf(outfile, "  ");
3104              PCHARSV(nametable, imm2_size, length, outfile);
3105              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3106    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3107              fprintf(outfile, "%3d\n", use_pcre16?
3108                 (int)(((PCRE_SPTR16)nametable)[0])
3109                :((int)nametable[0] << 8) | (int)nametable[1]);
3110              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3111    #else
3112              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3113    #ifdef SUPPORT_PCRE8
3114            nametable += nameentrysize;            nametable += nameentrysize;
3115    #else
3116              nametable += nameentrysize * 2;
3117    #endif
3118    #endif
3119            }            }
3120          }          }
3121    
3122        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3123        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3124    
3125        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3126        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3127    
3128        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3129          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 1946  while (!done) Line 3139  while (!done)
3139            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3140            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3141            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3142            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3143            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3144            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3145            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3146            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3147    
# Line 1990  while (!done) Line 3183  while (!done)
3183          }          }
3184        else        else
3185          {          {
3186          int ch = first_char & 255;          const char *caseless =
3187          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3188            "" : " (caseless)";            "" : " (caseless)";
3189          if (PRINTHEX(ch))  
3190            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3191              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3192          else          else
3193            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3194              fprintf(outfile, "First char = ");
3195              pchar(first_char, outfile);
3196              fprintf(outfile, "%s\n", caseless);
3197              }
3198          }          }
3199    
3200        if (need_char < 0)        if (need_char < 0)
# Line 2005  while (!done) Line 3203  while (!done)
3203          }          }
3204        else        else
3205          {          {
3206          int ch = need_char & 255;          const char *caseless =
3207          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3208            "" : " (caseless)";            "" : " (caseless)";
3209          if (PRINTHEX(ch))  
3210            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3211              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3212          else          else
3213            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3214              fprintf(outfile, "Need char = ");
3215              pchar(need_char, outfile);
3216              fprintf(outfile, "%s\n", caseless);
3217              }
3218          }          }
3219    
3220        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3221        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3222        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3223        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3224          information unless -i or -d was also present. This means that, except
3225          when auto-callouts are involved, the output from runs with and without
3226          -s should be identical. */
3227    
3228        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3229          {          {
3230          if (extra == NULL)          if (extra == NULL)
3231            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3232          else          else
3233            {            {
3234            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3235            int minlength;            int minlength;
3236    
3237            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3238            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3239    
3240            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3241              {              {
3242              int i;              if (start_bits == NULL)
3243              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3244              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3245                {                {
3246                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3247                  int c = 24;
3248                  fprintf(outfile, "Starting byte set: ");
3249                  for (i = 0; i < 256; i++)
3250                  {                  {
3251                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
                   {  
                   fprintf(outfile, "%c ", i);  
                   c += 2;  
                   }  
                 else  
3252                    {                    {
3253                    fprintf(outfile, "\\x%02x ", i);                    if (c > 75)
3254                    c += 5;                      {
3255                        fprintf(outfile, "\n  ");
3256                        c = 2;
3257                        }
3258                      if (PRINTOK(i) && i != ' ')
3259                        {
3260                        fprintf(outfile, "%c ", i);
3261                        c += 2;
3262                        }
3263                      else
3264                        {
3265                        fprintf(outfile, "\\x%02x ", i);
3266                        c += 5;
3267                        }
3268                    }                    }
3269                  }                  }
3270                  fprintf(outfile, "\n");
3271                }                }
3272              fprintf(outfile, "\n");              }
3273              }
3274    
3275            /* Show this only if the JIT was set by /S, not by -s. */
3276    
3277            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3278              {
3279              int jit;
3280              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3281                {
3282                if (jit)
3283                  fprintf(outfile, "JIT study was successful\n");
3284                else
3285    #ifdef SUPPORT_JIT
3286                  fprintf(outfile, "JIT study was not successful\n");
3287    #else
3288                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3289    #endif
3290              }              }
3291            }            }
3292          }          }
# Line 2079  while (!done) Line 3305  while (!done)
3305          }          }
3306        else        else
3307          {          {
3308          uschar sbuf[8];          pcre_uint8 sbuf[8];
3309          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3310          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3311          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3312          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3313            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3314          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3315          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3316          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3317          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3318            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3319    
3320          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3321              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2097  while (!done) Line 3324  while (!done)
3324            }            }
3325          else          else
3326            {            {
3327            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3328    
3329              /* If there is study data, write it. */
3330    
3331            if (extra != NULL)            if (extra != NULL)
3332              {              {
3333              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 2107  while (!done) Line 3337  while (!done)
3337                  strerror(errno));                  strerror(errno));
3338                }                }
3339              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3340              }              }
3341            }            }
3342          fclose(f);          fclose(f);
3343          }          }
3344    
3345        new_free(re);        new_free(re);
3346        if (extra != NULL) new_free(extra);        if (extra != NULL)
3347            {
3348            PCRE_FREE_STUDY(extra);
3349            }
3350        if (locale_set)        if (locale_set)
3351          {          {
3352          new_free((void *)tables);          new_free((void *)tables);
# Line 2129  while (!done) Line 3361  while (!done)
3361    
3362    for (;;)    for (;;)
3363      {      {
3364      uschar *q;      pcre_uint8 *q;
3365      uschar *bptr;      pcre_uint8 *bptr;
3366      int *use_offsets = offsets;      int *use_offsets = offsets;
3367      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3368      int callout_data = 0;      int callout_data = 0;
# Line 2146  while (!done) Line 3378  while (!done)
3378      int g_notempty = 0;      int g_notempty = 0;
3379      int use_dfa = 0;      int use_dfa = 0;
3380    
     options = 0;  
   
3381      *copynames = 0;      *copynames = 0;
3382      *getnames = 0;      *getnames = 0;
3383    
3384      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3385      getnamesptr = getnames;      cn16ptr = copynames;
3386        gn16ptr = getnames;
3387    #endif
3388    #ifdef SUPPORT_PCRE8
3389        cn8ptr = copynames8;
3390        gn8ptr = getnames8;
3391    #endif
3392    
3393      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3394      first_callout = 1;      first_callout = 1;
3395        last_callout_mark = NULL;
3396      callout_extra = 0;      callout_extra = 0;
3397      callout_count = 0;      callout_count = 0;
3398      callout_fail_count = 999999;      callout_fail_count = 999999;
3399      callout_fail_id = -1;      callout_fail_id = -1;
3400      show_malloc = 0;      show_malloc = 0;
3401        options = 0;
3402    
3403      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3404        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 2196  while (!done) Line 3434  while (!done)
3434        int i = 0;        int i = 0;
3435        int n = 0;        int n = 0;
3436    
3437        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3438          In non-UTF mode, allow the value of the byte to fall through to later,
3439          where values greater than 127 are turned into UTF-8 when running in
3440          16-bit mode. */
3441    
3442          if (c != '\\')
3443            {
3444            if (use_utf)
3445              {
3446              *q++ = c;
3447              continue;
3448              }
3449            }
3450    
3451          /* Handle backslash escapes */
3452    
3453          else switch ((c = *p++))
3454          {          {
3455          case 'a': c =    7; break;          case 'a': c =    7; break;
3456          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 2212  while (!done) Line 3466  while (!done)
3466          c -= '0';          c -= '0';
3467          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3468            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3469          break;          break;
3470    
3471          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3472          if (*p == '{')          if (*p == '{')
3473            {            {
3474            unsigned char *pt = p;            pcre_uint8 *pt = p;
3475            c = 0;            c = 0;
3476            while (isxdigit(*(++pt)))  
3477              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3478              when isxdigit() is a macro that refers to its argument more than
3479              once. This is banned by the C Standard, but apparently happens in at
3480              least one MacOS environment. */
3481    
3482              for (pt++; isxdigit(*pt); pt++)
3483                {
3484                if (++i == 9)
3485                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3486                                   "using only the first eight.\n");
3487                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3488                }
3489            if (*pt == '}')            if (*pt == '}')
3490              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3491              p = pt + 1;              p = pt + 1;
3492              break;              break;
3493              }              }
3494            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3495            }            }
 #endif  
3496    
3497          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3498            allows UTF-8 characters to be constructed byte by byte, and also allows
3499            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3500            Otherwise, pass it down to later code so that it can be turned into
3501            UTF-8 when running in 16-bit mode. */
3502    
3503          c = 0;          c = 0;
3504          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3505            {            {
3506            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3507            p++;            p++;
3508            }            }
3509            if (use_utf)
3510              {
3511              *q++ = c;
3512              continue;
3513              }
3514          break;          break;
3515    
3516          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 2300  while (!done) Line 3543  while (!done)
3543            }            }
3544          else if (isalnum(*p))          else if (isalnum(*p))
3545            {            {
3546            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3547            }            }
3548          else if (*p == '+')          else if (*p == '+')
3549            {            {
# Line 2316  while (!done) Line 3552  while (!done)
3552            }            }
3553          else if (*p == '-')          else if (*p == '-')
3554            {            {
3555            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3556            p++;            p++;
3557            }            }
3558          else if (*p == '!')          else if (*p == '!')
# Line 2370  while (!done) Line 3606  while (!done)
3606            }            }
3607          else if (isalnum(*p))          else if (isalnum(*p))
3608            {            {
3609            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3610            while (isalnum(*p)) *npp++ = *p++;            }
3611            *npp++ = 0;          continue;
3612            *npp = 0;  
3613            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3614            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3615              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3616            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3617                && extra->executable_jit != NULL)
3618              {
3619              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3620              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3621              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3622            }            }
3623          continue;          continue;
3624    
# Line 2473  while (!done) Line 3714  while (!done)
3714            }            }
3715          continue;          continue;
3716          }          }
3717        *q++ = c;  
3718          /* We now have a character value in c that may be greater than 255. In
3719          16-bit mode, we always convert characters to UTF-8 so that values greater
3720          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3721          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3722          mode must have come from \x{...} or octal constructs because values from
3723          \x.. get this far only in non-UTF mode. */
3724    
3725    #if !defined NOUTF || defined SUPPORT_PCRE16
3726          if (use_pcre16 || use_utf)
3727            {
3728            pcre_uint8 buff8[8];
3729            int ii, utn;
3730            utn = ord2utf8(c, buff8);
3731            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3732            }
3733          else
3734    #endif
3735            {
3736            if (c > 255)
3737              {
3738              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3739                "and UTF-8 mode is not enabled.\n", c);
3740              fprintf(outfile, "** Truncation will probably give the wrong "
3741                "result.\n");
3742              }
3743            *q++ = c;
3744            }
3745        }        }
3746    
3747        /* Reached end of subject string */
3748    
3749      *q = 0;      *q = 0;
3750      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
3751    
# Line 2536  while (!done) Line 3807  while (!done)
3807            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3808              {              {
3809              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3810              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3811                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3812              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3813              if (i == 0 && do_showrest)              if (do_showcaprest || (i == 0 && do_showrest))
3814                {                {
3815                fprintf(outfile, " 0+ ");                fprintf(outfile, "%2d+ ", (int)i);
3816                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3817                  outfile);                  outfile);
3818                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3819                }                }
# Line 2550  while (!done) Line 3821  while (!done)
3821            }            }
3822          }          }
3823        free(pmatch);        free(pmatch);
3824          goto NEXT_DATA;
3825        }        }
3826    
3827    #endif  /* !defined NOPOSIX */
3828    
3829      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3830    
3831      else  #ifdef SUPPORT_PCRE16
3832  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3833          {
3834          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3835          switch(len)
3836            {
3837            case -1:
3838            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3839              "converted to UTF-16\n");
3840            goto NEXT_DATA;
3841    
3842            case -2:
3843            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3844              "cannot be converted to UTF-16\n");
3845            goto NEXT_DATA;
3846    
3847            case -3:
3848            fprintf(outfile, "**Failed: character value greater than 0xffff "
3849              "cannot be converted to 16-bit in non-UTF mode\n");
3850            goto NEXT_DATA;
3851    
3852            default:
3853            break;
3854            }
3855          bptr = (pcre_uint8 *)buffer16;
3856          }
3857    #endif
3858    
3859      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3860        {        {
# Line 2572  while (!done) Line 3871  while (!done)
3871            {            {
3872            int workspace[1000];            int workspace[1000];
3873            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3874              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3875                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3876                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3877                  (sizeof(workspace)/sizeof(int)));
3878                }
3879            }            }
3880          else          else
3881  #endif  #endif
3882    
3883          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3884            count = pcre_exec(re, extra, (char *)bptr, len,            {
3885              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3886                (options | g_notempty), use_offsets, use_size_offsets);
3887              }
3888          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3889          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3890            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2591  while (!done) Line 3893  while (!done)
3893    
3894        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3895        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3896        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3897          running of pcre_exec(), so disable the JIT optimization. This makes it
3898          possible to run the same set of tests with and without JIT externally
3899          requested. */
3900    
3901        if (find_match_limit)        if (find_match_limit)
3902          {          {
# Line 2600  while (!done) Line 3905  while (!done)
3905            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3906            extra->flags = 0;            extra->flags = 0;
3907            }            }
3908            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3909    
3910          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3911            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2623  while (!done) Line 3929  while (!done)
3929            }            }
3930          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3931          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3932          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3933            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3934          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3935          }          }
# Line 2635  while (!done) Line 3941  while (!done)
3941        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3942          {          {
3943          int workspace[1000];          int workspace[1000];
3944          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3945            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3946            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3947          if (count == 0)          if (count == 0)
3948            {            {
3949            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2648  while (!done) Line 3954  while (!done)
3954    
3955        else        else
3956          {          {
3957          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3958            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3959          if (count == 0)          if (count == 0)
3960            {            {
3961            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2662  while (!done) Line 3968  while (!done)
3968        if (count >= 0)        if (count >= 0)
3969          {          {
3970          int i, maxcount;          int i, maxcount;
3971            void *cnptr, *gnptr;
3972    
3973  #if !defined NODFA  #if !defined NODFA
3974          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2683  while (!done) Line 3990  while (!done)
3990              }              }
3991            }            }
3992    
3993            /* do_allcaps requests showing of all captures in the pattern, to check
3994            unset ones at the end. */
3995    
3996            if (do_allcaps)
3997              {
3998              if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3999                goto SKIP_DATA;
4000              count++;   /* Allow for full match */
4001              if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4002              }
4003    
4004            /* Output the captured substrings */
4005    
4006          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
4007            {            {
4008            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
4009                {
4010                if (use_offsets[i] != -1)
4011                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4012                    use_offsets[i], i);
4013                if (use_offsets[i+1] != -1)
4014                  fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4015                    use_offsets[i+1], i+1);
4016              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
4017                }
4018            else            else
4019              {              {
4020              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4021              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4022                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4023              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4024              if (i == 0)              if (do_showcaprest || (i == 0 && do_showrest))
4025                {                {
4026                if (do_showrest)                fprintf(outfile, "%2d+ ", i/2);
4027                  {                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4028                  fprintf(outfile, " 0+ ");                  outfile);
4029                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                fprintf(outfile, "\n");
                   outfile);  
                 fprintf(outfile, "\n");  
                 }  
4030                }                }
4031              }              }
4032            }            }
4033    
4034          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
4035              {
4036              fprintf(outfile, "MK: ");
4037              PCHARSV(markptr, 0, -1, outfile);
4038              fprintf(outfile, "\n");
4039              }
4040    
4041          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4042            {            {
4043            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
4044              {              {
4045                int rc;
4046              char copybuffer[256];              char copybuffer[256];
4047              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4048                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
4049              if (rc < 0)              if (rc < 0)
4050                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4051              else              else
4052                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
4053                  fprintf(outfile, "%2dC ", i);
4054                  PCHARSV(copybuffer, 0, rc, outfile);
4055                  fprintf(outfile, " (%d)\n", rc);
4056                  }
4057              }              }
4058            }            }
4059    
4060          for (copynamesptr = copynames;          cnptr = copynames;
4061               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4062            {            {
4063              int rc;
4064            char copybuffer[256];            char copybuffer[256];
4065            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4066              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4067                {
4068                if (*(pcre_uint16 *)cnptr == 0) break;
4069                }
4070              else
4071                {
4072                if (*(pcre_uint8 *)cnptr == 0) break;
4073                }
4074    
4075              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4076                cnptr, copybuffer, sizeof(copybuffer));
4077    
4078            if (rc < 0)            if (rc < 0)
4079              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4080                fprintf(outfile, "copy substring ");
4081                PCHARSV(cnptr, 0, -1, outfile);
4082                fprintf(outfile, " failed %d\n", rc);
4083                }
4084            else            else
4085              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4086                fprintf(outfile, "  C ");
4087                PCHARSV(copybuffer, 0, rc, outfile);
4088                fprintf(outfile, " (%d) ", rc);
4089                PCHARSV(cnptr, 0, -1, outfile);
4090                putc('\n', outfile);
4091                }
4092    
4093              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4094            }            }
4095    
4096          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4097            {            {
4098            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4099              {              {
4100                int rc;
4101              const char *substring;              const char *substring;
4102              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4103              if (rc < 0)              if (rc < 0)
4104                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4105              else              else
4106                {                {
4107                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4108                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4109                  fprintf(outfile, " (%d)\n", rc);
4110                  PCRE_FREE_SUBSTRING(substring);
4111                }                }
4112              }              }
4113            }            }
4114    
4115          for (getnamesptr = getnames;          gnptr = getnames;
4116               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4117            {            {
4118              int rc;
4119            const char *substring;            const char *substring;
4120            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4121              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4122                {
4123                if (*(pcre_uint16 *)gnptr == 0) break;
4124                }
4125              else
4126                {
4127                if (*(pcre_uint8 *)gnptr == 0) break;
4128                }
4129    
4130              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4131                gnptr, &substring);
4132            if (rc < 0)            if (rc < 0)
4133              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4134                fprintf(outfile, "get substring ");
4135                PCHARSV(gnptr, 0, -1, outfile);
4136                fprintf(outfile, " failed %d\n", rc);
4137                }
4138            else            else
4139              {              {
4140              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4141              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4142                fprintf(outfile, " (%d) ", rc);
4143                PCHARSV(gnptr, 0, -1, outfile);
4144                PCRE_FREE_SUBSTRING(substring);
4145                putc('\n', outfile);
4146              }              }
4147    
4148              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4149            }            }
4150    
4151          if (getlist)          if (getlist)
4152            {            {
4153              int rc;
4154            const char **stringlist;            const char **stringlist;
4155            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4156            if (rc < 0)            if (rc < 0)
4157              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4158            else            else
4159              {              {
4160              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4161                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4162                  fprintf(outfile, "%2dL ", i);
4163                  PCHARSV(stringlist[i], 0, -1, outfile);
4164                  putc('\n', outfile);
4165                  }
4166              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4167                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4168              /* free((void *)stringlist); */              PCRE_FREE_SUBSTRING_LIST(stringlist);
             pcre_free_substring_list(stringlist);  
4169              }              }
4170            }            }
4171          }          }
# Line 2792  while (!done) Line 4175  while (!done)
4175        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4176          {          {
4177          if (markptr == NULL) fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4178            else fprintf(outfile, "Partial match, mark=%s", markptr);          else
4179              {
4180              fprintf(outfile, "Partial match, mark=");
4181              PCHARSV(markptr, 0, -1, outfile);
4182              }
4183          if (use_size_offsets > 1)          if (use_size_offsets > 1)
4184            {            {
4185            fprintf(outfile, ": ");            fprintf(outfile, ": ");
4186            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4187              outfile);              outfile);
4188            }            }
4189          fprintf(outfile, "\n");          fprintf(outfile, "\n");
# Line 2813  while (!done) Line 4200  while (!done)
4200        terminated by CRLF, an advance of one character just passes the \r,        terminated by CRLF, an advance of one character just passes the \r,
4201        whereas we should prefer the longer newline sequence, as does the code in        whereas we should prefer the longer newline sequence, as does the code in
4202        pcre_exec(). Fudge the offset value to achieve this. We check for a        pcre_exec(). Fudge the offset value to achieve this. We check for a
4203        newline setting in the pattern; if none was set, use pcre_config() to        newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4204        find the default.        find the default.
4205    
4206        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
# Line 2824  while (!done) Line 4211  while (!done)
4211          if (g_notempty != 0)          if (g_notempty != 0)
4212            {            {
4213            int onechar = 1;            int onechar = 1;
4214            unsigned int obits = ((real_pcre *)re)->options;            unsigned int obits = ((REAL_PCRE *)re)->options;
4215            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
4216            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
4217              {              {
4218              int d;              int d;
4219              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4220              /* Note that these values are always the ASCII ones, even in              /* Note that these values are always the ASCII ones, even in
4221              EBCDIC environments. CR = 13, NL = 10. */              EBCDIC environments. CR = 13, NL = 10. */
4222              obits = (d == 13)? PCRE_NEWLINE_CR :              obits = (d == 13)? PCRE_NEWLINE_CR :
# Line 2843  while (!done) Line 4230  while (!done)
4230                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4231                &&                &&
4232                start_offset < len - 1 &&                start_offset < len - 1 &&
4233                bptr[start_offset] == '\r' &&  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4234                bptr[start_offset+1] == '\n')                (use_pcre16?
4235                       ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4236                    && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4237                  :
4238                       bptr[start_offset] == '\r'
4239                    && bptr[start_offset + 1] == '\n')
4240    #elif defined SUPPORT_PCRE16
4241                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4242                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4243    #else
4244                     bptr[start_offset] == '\r'
4245                  && bptr[start_offset + 1] == '\n'
4246    #endif
4247                  )
4248              onechar++;              onechar++;
4249            else if (use_utf8)            else if (use_utf)
4250              {              {
4251              while (start_offset + onechar < len)              while (start_offset + onechar < len)
4252                {                {
# Line 2858  while (!done) Line 4258  while (!done)
4258            }            }
4259          else          else
4260            {            {
4261            if (count == PCRE_ERROR_NOMATCH)            switch(count)
4262              {              {
4263                case PCRE_ERROR_NOMATCH:
4264              if (gmatched == 0)              if (gmatched == 0)
4265                {                {
4266                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL)
4267                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  {
4268                    fprintf(outfile, "No match\n");
4269                    }
4270                  else
4271                    {
4272                    fprintf(outfile, "No match, mark = ");
4273                    PCHARSV(markptr, 0, -1, outfile);
4274                    putc('\n', outfile);
4275                    }
4276                }                }
4277                break;
4278    
4279                case PCRE_ERROR_BADUTF8:
4280                case PCRE_ERROR_SHORTUTF8:
4281                fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4282                  (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4283                  use_pcre16? "16" : "8");
4284                if (use_size_offsets >= 2)
4285                  fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4286                    use_offsets[1]);
4287                fprintf(outfile, "\n");
4288                break;
4289    
4290                case PCRE_ERROR_BADUTF8_OFFSET:
4291                fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4292                  use_pcre16? "16" : "8");
4293                break;
4294    
4295                default:
4296                if (count < 0 &&
4297                    (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4298                  fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4299                else
4300                  fprintf(outfile, "Error %d (Unexpected value)\n", count);
4301                break;
4302              }              }
4303            else fprintf(outfile, "Error %d\n", count);  
4304            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
4305            }            }
4306          }          }
# Line 2898  while (!done) Line 4332  while (!done)
4332    
4333        else        else
4334          {          {
4335          bptr += use_offsets[1];          bptr += use_offsets[1] * CHAR_SIZE;
4336          len -= use_offsets[1];          len -= use_offsets[1];
4337          }          }
4338        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
# Line 2913  while (!done) Line 4347  while (!done)
4347  #endif  #endif
4348    
4349    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
4350    if (extra != NULL) new_free(extra);    if (extra != NULL)
4351        {
4352        PCRE_FREE_STUDY(extra);
4353        }
4354    if (locale_set)    if (locale_set)
4355      {      {
4356      new_free((void *)tables);      new_free((void *)tables);
4357      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
4358      locale_set = 0;      locale_set = 0;
4359      }      }
4360      if (jit_stack != NULL)
4361        {
4362        PCRE_JIT_STACK_FREE(jit_stack);
4363        jit_stack = NULL;
4364        }
4365    }    }
4366    
4367  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 2934  free(dbuffer); Line 4376  free(dbuffer);
4376  free(pbuffer);  free(pbuffer);
4377  free(offsets);  free(offsets);
4378    
4379    #ifdef SUPPORT_PCRE16
4380    if (buffer16 != NULL) free(buffer16);
4381    #endif
4382    
4383  return yield;  return yield;
4384  }  }
4385    

Legend:
Removed from v.580  
changed lines
  Added in v.904

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12