/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 735 by ph10, Thu Oct 13 15:51:27 2011 UTC revision 838 by ph10, Thu Dec 29 18:27:07 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 105  here before pcre_internal.h so that the Line 116  here before pcre_internal.h so that the
116  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
117    
118  #include "pcre.h"  #include "pcre.h"
119    
120    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
121    /* Configure internal macros to 16 bit mode. */
122    #define COMPILE_PCRE16
123    #endif
124    
125  #include "pcre_internal.h"  #include "pcre_internal.h"
126    
127    /* The pcre_printint() function, which prints the internal form of a compiled
128    regex, is held in a separate file so that (a) it can be compiled in either
129    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
130    when that is compiled in debug mode. */
131    
132    #ifdef SUPPORT_PCRE8
133    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
134    #endif
135    #ifdef SUPPORT_PCRE16
136    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
137    #endif
138    
139  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
140  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
141  external symbols to prevent clashes. */  external symbols to prevent clashes. */
142    
143  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
144  #define _pcre_ucp_typerange    ucp_typerange  #undef PRIV
145  #define _pcre_utf8_table1      utf8_table1  #define PRIV(name) name
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utf8_char_sizes  utf8_char_sizes  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
146    
147  #include "pcre_tables.c"  #include "pcre_tables.c"
148    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
149  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
150  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
151  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
152  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
153  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
154    
155    #ifdef EBCDIC
156    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
157    #else
158    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
159    #endif
160    
161    #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
162    
163  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  /* Posix support is disabled in 16 bit only mode. */
164    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
165    #define NOPOSIX
166    #endif
167    
168  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
169  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 150  Makefile. */ Line 173  Makefile. */
173  #include "pcreposix.h"  #include "pcreposix.h"
174  #endif  #endif
175    
176  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
177  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
178  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
179  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
180  UTF8 support if PCRE is built without it. */  
181    #ifndef SUPPORT_UTF
182  #ifndef SUPPORT_UTF8  #ifndef NOUTF
183  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
184  #endif  #endif
185  #endif  #endif
186    
187    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
188    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
189    only from one place and is handled differently). I couldn't dream up any way of
190    using a single macro to do this in a generic way, because of the many different
191    argument requirements. We know that at least one of SUPPORT_PCRE8 and
192    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
193    use these in the definitions of generic macros.
194    
195    **** Special note about the PCHARSxxx macros: the address of the string to be
196    printed is always given as two arguments: a base address followed by an offset.
197    The base address is cast to the correct data size for 8 or 16 bit data; the
198    offset is in units of this size. If the string were given as base+offset in one
199    argument, the casting might be incorrectly applied. */
200    
201    #ifdef SUPPORT_PCRE8
202    
203    #define PCHARS8(lv, p, offset, len, f) \
204      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
205    
206    #define PCHARSV8(p, offset, len, f) \
207      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
208    
209    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
210      p = read_capture_name8(p, cn8, re)
211    
212    #define SET_PCRE_CALLOUT8(callout) \
213      pcre_callout = callout
214    
215    #define STRLEN8(p) ((int)strlen((char *)p))
216    
217    
218    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
219      re = pcre_compile((char *)pat, options, error, erroffset, tables)
220    
221    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
222        namesptr, cbuffer, size) \
223      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
224        (char *)namesptr, cbuffer, size)
225    
226    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
227      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
228    
229    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
230        offsets, size_offsets, workspace, size_workspace) \
231      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
232        offsets, size_offsets, workspace, size_workspace)
233    
234    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
235        offsets, size_offsets) \
236      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
237        offsets, size_offsets)
238    
239    #define PCRE_FREE_STUDY8(extra) \
240      pcre_free_study(extra)
241    
242    #define PCRE_FREE_SUBSTRING8(substring) \
243      pcre_free_substring(substring)
244    
245    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
246      pcre_free_substring_list(listptr)
247    
248    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
249        getnamesptr, subsptr) \
250      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
251        (char *)getnamesptr, subsptr)
252    
253    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
254      n = pcre_get_stringnumber(re, (char *)ptr)
255    
256    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
257      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
258    
259    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
260      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
261    
262    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
263      pcre_pattern_to_host_byte_order(re, extra, tables)
264    
265    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
266      pcre_printint(re, outfile, debug_lengths)
267    
268    #define PCRE_STUDY8(extra, re, options, error) \
269      extra = pcre_study(re, options, error)
270    
271    #endif /* SUPPORT_PCRE8 */
272    
273    /* -----------------------------------------------------------*/
274    
275    #ifdef SUPPORT_PCRE16
276    
277    #define PCHARS16(lv, p, offset, len, f) \
278      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
279    
280    #define PCHARSV16(p, offset, len, f) \
281      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
282    
283    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
284      p = read_capture_name16(p, cn16, re)
285    
286    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
287    
288    #define SET_PCRE_CALLOUT16(callout) \
289      pcre16_callout = callout
290    
291    
292    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
293      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
294    
295    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
296        namesptr, cbuffer, size) \
297      rc = pcre16_copy_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
298        (PCRE_SPTR16)namesptr, (PCRE_SCHAR16 *)cbuffer, size/2)
299    
300    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
301      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
302        (PCRE_SCHAR16 *)cbuffer, size/2)
303    
304    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
305        offsets, size_offsets, workspace, size_workspace) \
306      count = pcre16_dfa_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
307        options, offsets, size_offsets, workspace, size_workspace)
308    
309    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
310        offsets, size_offsets) \
311      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
312        options, offsets, size_offsets)
313    
314    #define PCRE_FREE_STUDY16(extra) \
315      pcre16_free_study(extra)
316    
317    #define PCRE_FREE_SUBSTRING16(substring) \
318      pcre16_free_substring((PCRE_SPTR16)substring)
319    
320    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
321      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
322    
323    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
324        getnamesptr, subsptr) \
325      rc = pcre16_get_named_substring(re, (PCRE_SPTR16)bptr, offsets, count, \
326        (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
327    
328    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
329      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
330    
331    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
332      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
333        (PCRE_SPTR16 *)(void*)subsptr)
334    
335    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
336      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
337        (PCRE_SPTR16 **)(void*)listptr)
338    
339    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
340      pcre16_pattern_to_host_byte_order(re, extra, tables)
341    
342    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
343      pcre16_printint(re, outfile, debug_lengths)
344    
345    #define PCRE_STUDY16(extra, re, options, error) \
346      extra = pcre16_study(re, options, error)
347    
348    #endif /* SUPPORT_PCRE16 */
349    
350    
351    /* ----- Both modes are supported; a runtime test is needed, except for
352    pcre_config(), and the JIT stack functions, when it doesn't matter which
353    version is called. ----- */
354    
355    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
356    
357    #define CHAR_SIZE (use_pcre16? 2:1)
358    
359    #define PCHARS(lv, p, offset, len, f) \
360      if (use_pcre16) \
361        PCHARS16(lv, p, offset, len, f); \
362      else \
363        PCHARS8(lv, p, offset, len, f)
364    
365    #define PCHARSV(p, offset, len, f) \
366      if (use_pcre16) \
367        PCHARSV16(p, offset, len, f); \
368      else \
369        PCHARSV8(p, offset, len, f)
370    
371    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
372      if (use_pcre16) \
373        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
374      else \
375        READ_CAPTURE_NAME8(p, cn8, cn16, re)
376    
377    #define SET_PCRE_CALLOUT(callout) \
378      if (use_pcre16) \
379        SET_PCRE_CALLOUT16(callout); \
380      else \
381        SET_PCRE_CALLOUT8(callout)
382    
383    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
384    
385    #define PCRE_ASSIGN_JIT_STACK pcre_assign_jit_stack
386    
387    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
388      if (use_pcre16) \
389        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
390      else \
391        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
392    
393    #define PCRE_CONFIG pcre_config
394    
395    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
396        namesptr, cbuffer, size) \
397      if (use_pcre16) \
398        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
399          namesptr, cbuffer, size); \
400      else \
401        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
402          namesptr, cbuffer, size)
403    
404    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
405      if (use_pcre16) \
406        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
407      else \
408        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
409    
410    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
411        offsets, size_offsets, workspace, size_workspace) \
412      if (use_pcre16) \
413        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
414          offsets, size_offsets, workspace, size_workspace); \
415      else \
416        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
417          offsets, size_offsets, workspace, size_workspace)
418    
419    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
420        offsets, size_offsets) \
421      if (use_pcre16) \
422        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
423          offsets, size_offsets); \
424      else \
425        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
426          offsets, size_offsets)
427    
428    #define PCRE_FREE_STUDY(extra) \
429      if (use_pcre16) \
430        PCRE_FREE_STUDY16(extra); \
431      else \
432        PCRE_FREE_STUDY8(extra)
433    
434    #define PCRE_FREE_SUBSTRING(substring) \
435      if (use_pcre16) \
436        PCRE_FREE_SUBSTRING16(substring); \
437      else \
438        PCRE_FREE_SUBSTRING8(substring)
439    
440    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
441      if (use_pcre16) \
442        PCRE_FREE_SUBSTRING_LIST16(listptr); \
443      else \
444        PCRE_FREE_SUBSTRING_LIST8(listptr)
445    
446    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
447        getnamesptr, subsptr) \
448      if (use_pcre16) \
449        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
450          getnamesptr, subsptr); \
451      else \
452        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
453          getnamesptr, subsptr)
454    
455    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
456      if (use_pcre16) \
457        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
458      else \
459        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
460    
461    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
462      if (use_pcre16) \
463        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
464      else \
465        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
466    
467    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
468      if (use_pcre16) \
469        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
470      else \
471        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
472    
473    #define PCRE_JIT_STACK_ALLOC pcre_jit_stack_alloc
474    #define PCRE_JIT_STACK_FREE pcre_jit_stack_free
475    
476    #define PCRE_MAKETABLES \
477      (use_pcre16? pcre16_maketables() : pcre_maketables())
478    
479    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
480      if (use_pcre16) \
481        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
482      else \
483        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
484    
485    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
486      if (use_pcre16) \
487        PCRE_PRINTINT16(re, outfile, debug_lengths); \
488      else \
489        PCRE_PRINTINT8(re, outfile, debug_lengths)
490    
491    #define PCRE_STUDY(extra, re, options, error) \
492      if (use_pcre16) \
493        PCRE_STUDY16(extra, re, options, error); \
494      else \
495        PCRE_STUDY8(extra, re, options, error)
496    
497    /* ----- Only 8-bit mode is supported ----- */
498    
499    #elif defined SUPPORT_PCRE8
500    #define CHAR_SIZE                 1
501    #define PCHARS                    PCHARS8
502    #define PCHARSV                   PCHARSV8
503    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
504    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
505    #define STRLEN                    STRLEN8
506    #define PCRE_ASSIGN_JIT_STACK     pcre_assign_jit_stack
507    #define PCRE_COMPILE              PCRE_COMPILE8
508    #define PCRE_CONFIG               pcre_config
509    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
510    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
511    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
512    #define PCRE_EXEC                 PCRE_EXEC8
513    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
514    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
515    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
516    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
517    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
518    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
519    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
520    #define PCRE_JIT_STACK_ALLOC      pcre_jit_stack_alloc
521    #define PCRE_JIT_STACK_FREE       pcre_jit_stack_free
522    #define PCRE_MAKETABLES           pcre_maketables()
523    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
524    #define PCRE_PRINTINT             PCRE_PRINTINT8
525    #define PCRE_STUDY                PCRE_STUDY8
526    
527    /* ----- Only 16-bit mode is supported ----- */
528    
529    #else
530    #define CHAR_SIZE                 2
531    #define PCHARS                    PCHARS16
532    #define PCHARSV                   PCHARSV16
533    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
534    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
535    #define STRLEN                    STRLEN16
536    #define PCRE_ASSIGN_JIT_STACK     pcre16_assign_jit_stack
537    #define PCRE_COMPILE              PCRE_COMPILE16
538    #define PCRE_CONFIG               pcre16_config
539    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
540    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
541    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
542    #define PCRE_EXEC                 PCRE_EXEC16
543    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
544    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
545    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
546    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
547    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
548    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
549    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
550    #define PCRE_JIT_STACK_ALLOC      pcre16_jit_stack_alloc
551    #define PCRE_JIT_STACK_FREE       pcre16_jit_stack_free
552    #define PCRE_MAKETABLES           pcre16_maketables()
553    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
554    #define PCRE_PRINTINT             PCRE_PRINTINT16
555    #define PCRE_STUDY                PCRE_STUDY16
556    #endif
557    
558    /* ----- End of mode-specific function call macros ----- */
559    
560    
561  /* Other parameters */  /* Other parameters */
562    
# Line 189  static int debug_lengths; Line 584  static int debug_lengths;
584  static int first_callout;  static int first_callout;
585  static int locale_set = 0;  static int locale_set = 0;
586  static int show_malloc;  static int show_malloc;
587  static int use_utf8;  static int use_utf;
588  static size_t gotten_store;  static size_t gotten_store;
589    static size_t first_gotten_store = 0;
590  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
591    
592  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
593    
594  static int buffer_size = 50000;  static int buffer_size = 50000;
595  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
596  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
597  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
598    
599    /* Another buffer is needed translation to 16-bit character strings. It will
600    obtained and extended as required. */
601    
602    #ifdef SUPPORT_PCRE16
603    static int buffer16_size = 0;
604    static pcre_uint16 *buffer16 = NULL;
605    
606    #ifdef SUPPORT_PCRE8
607    
608    /* We need the table of operator lengths that is used for 16-bit compiling, in
609    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
610    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
611    appropriately for the 16-bit world. Just as a safety check, make sure that
612    COMPILE_PCRE16 is *not* set. */
613    
614    #ifdef COMPILE_PCRE16
615    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
616    #endif
617    
618    #if LINK_SIZE == 2
619    #undef LINK_SIZE
620    #define LINK_SIZE 1
621    #elif LINK_SIZE == 3 || LINK_SIZE == 4
622    #undef LINK_SIZE
623    #define LINK_SIZE 2
624    #else
625    #error LINK_SIZE must be either 2, 3, or 4
626    #endif
627    
628    #endif /* SUPPORT_PCRE8 */
629    
630    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
631    #endif  /* SUPPORT_PCRE16 */
632    
633    /* If we have 8-bit support, default use_pcre16 to false; if there is also
634    16-bit support, it can be changed by an option. If there is no 8-bit support,
635    there must be 16-bit support, so default it to 1. */
636    
637    #ifdef SUPPORT_PCRE8
638    static int use_pcre16 = 0;
639    #else
640    static int use_pcre16 = 1;
641    #endif
642    
643  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
644    
# Line 213  static const char *errtexts[] = { Line 653  static const char *errtexts[] = {
653    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
654    "match limit exceeded",    "match limit exceeded",
655    "callout error code",    "callout error code",
656    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
657    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
658    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
659    "not used - internal error",    "not used - internal error",
660    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 228  static const char *errtexts[] = { Line 668  static const char *errtexts[] = {
668    "not used - internal error",    "not used - internal error",
669    "invalid combination of newline options",    "invalid combination of newline options",
670    "bad offset value",    "bad offset value",
671    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
672    "nested recursion at the same subject position",    "nested recursion at the same subject position",
673    "JIT stack limit reached"    "JIT stack limit reached",
674      "pattern compiled in wrong mode: 8-bit/16-bit error"
675  };  };
676    
677    
# Line 246  the L (locale) option also adjusts the t Line 687  the L (locale) option also adjusts the t
687  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
688  only ASCII characters. */  only ASCII characters. */
689    
690  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
691    
692  /* This table is a lower casing table. */  /* This table is a lower casing table. */
693    
# Line 419  graph, print, punct, and cntrl. Other cl Line 860  graph, print, punct, and cntrl. Other cl
860  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
861  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
862    
863  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
864  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
865  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
866  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 592  return (pcre_jit_stack *)arg; Line 1033  return (pcre_jit_stack *)arg;
1033  }  }
1034    
1035    
1036    #if !defined NOUTF || defined SUPPORT_PCRE16
1037    /*************************************************
1038    *            Convert UTF-8 string to value       *
1039    *************************************************/
1040    
1041    /* This function takes one or more bytes that represents a UTF-8 character,
1042    and returns the value of the character.
1043    
1044    Argument:
1045      utf8bytes   a pointer to the byte vector
1046      vptr        a pointer to an int to receive the value
1047    
1048    Returns:      >  0 => the number of bytes consumed
1049                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1050    */
1051    
1052    static int
1053    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1054    {
1055    int c = *utf8bytes++;
1056    int d = c;
1057    int i, j, s;
1058    
1059    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1060      {
1061      if ((d & 0x80) == 0) break;
1062      d <<= 1;
1063      }
1064    
1065    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1066    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1067    
1068    /* i now has a value in the range 1-5 */
1069    
1070    s = 6*i;
1071    d = (c & utf8_table3[i]) << s;
1072    
1073    for (j = 0; j < i; j++)
1074      {
1075      c = *utf8bytes++;
1076      if ((c & 0xc0) != 0x80) return -(j+1);
1077      s -= 6;
1078      d |= (c & 0x3f) << s;
1079      }
1080    
1081    /* Check that encoding was the correct unique one */
1082    
1083    for (j = 0; j < utf8_table1_size; j++)
1084      if (d <= utf8_table1[j]) break;
1085    if (j != i) return -(i+1);
1086    
1087    /* Valid value */
1088    
1089    *vptr = d;
1090    return i+1;
1091    }
1092    #endif /* NOUTF || SUPPORT_PCRE16 */
1093    
1094    
1095    
1096    #if !defined NOUTF || defined SUPPORT_PCRE16
1097    /*************************************************
1098    *       Convert character value to UTF-8         *
1099    *************************************************/
1100    
1101    /* This function takes an integer value in the range 0 - 0x7fffffff
1102    and encodes it as a UTF-8 character in 0 to 6 bytes.
1103    
1104    Arguments:
1105      cvalue     the character value
1106      utf8bytes  pointer to buffer for result - at least 6 bytes long
1107    
1108    Returns:     number of characters placed in the buffer
1109    */
1110    
1111    static int
1112    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1113    {
1114    register int i, j;
1115    for (i = 0; i < utf8_table1_size; i++)
1116      if (cvalue <= utf8_table1[i]) break;
1117    utf8bytes += i;
1118    for (j = i; j > 0; j--)
1119     {
1120     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1121     cvalue >>= 6;
1122     }
1123    *utf8bytes = utf8_table2[i] | cvalue;
1124    return i + 1;
1125    }
1126    #endif /* NOUTF || SUPPORT_PCRE16 */
1127    
1128    
1129    
1130    #ifdef SUPPORT_PCRE16
1131    /*************************************************
1132    *         Convert a string to 16-bit             *
1133    *************************************************/
1134    
1135    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1136    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1137    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1138    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1139    result is always left in buffer16.
1140    
1141    Note that this function does not object to surrogate values. This is
1142    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1143    for the purpose of testing that they are correctly faulted.
1144    
1145    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1146    in UTF-8 so that values greater than 255 can be handled.
1147    
1148    Arguments:
1149      data       TRUE if converting a data line; FALSE for a regex
1150      p          points to a byte string
1151      utf        true if UTF-8 (to be converted to UTF-16)
1152      len        number of bytes in the string (excluding trailing zero)
1153    
1154    Returns:     number of 16-bit data items used (excluding trailing zero)
1155                 OR -1 if a UTF-8 string is malformed
1156                 OR -2 if a value > 0x10ffff is encountered
1157                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1158    */
1159    
1160    static int
1161    to16(int data, pcre_uint8 *p, int utf, int len)
1162    {
1163    pcre_uint16 *pp;
1164    
1165    if (buffer16_size < 2*len + 2)
1166      {
1167      if (buffer16 != NULL) free(buffer16);
1168      buffer16_size = 2*len + 2;
1169      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1170      if (buffer16 == NULL)
1171        {
1172        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1173        exit(1);
1174        }
1175      }
1176    
1177    pp = buffer16;
1178    
1179    if (!utf && !data)
1180      {
1181      while (len-- > 0) *pp++ = *p++;
1182      }
1183    
1184    else
1185      {
1186      int c = 0;
1187      while (len > 0)
1188        {
1189        int chlen = utf82ord(p, &c);
1190        if (chlen <= 0) return -1;
1191        if (c > 0x10ffff) return -2;
1192        p += chlen;
1193        len -= chlen;
1194        if (c < 0x10000) *pp++ = c; else
1195          {
1196          if (!utf) return -3;
1197          c -= 0x10000;
1198          *pp++ = 0xD800 | (c >> 10);
1199          *pp++ = 0xDC00 | (c & 0x3ff);
1200          }
1201        }
1202      }
1203    
1204    *pp = 0;
1205    return pp - buffer16;
1206    }
1207    #endif
1208    
1209    
1210  /*************************************************  /*************************************************
1211  *        Read or extend an input line            *  *        Read or extend an input line            *
1212  *************************************************/  *************************************************/
# Line 615  Returns: pointer to the start of n Line 1230  Returns: pointer to the start of n
1230                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1231  */  */
1232    
1233  static uschar *  static pcre_uint8 *
1234  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1235  {  {
1236  uschar *here = start;  pcre_uint8 *here = start;
1237    
1238  for (;;)  for (;;)
1239    {    {
# Line 665  for (;;) Line 1280  for (;;)
1280    else    else
1281      {      {
1282      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1283      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1284      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1285      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1286    
1287      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1288        {        {
# Line 698  return NULL; /* Control never gets here Line 1313  return NULL; /* Control never gets here
1313    
1314    
1315    
   
   
   
   
1316  /*************************************************  /*************************************************
1317  *          Read number from string               *  *          Read number from string               *
1318  *************************************************/  *************************************************/
# Line 718  Returns: the unsigned long Line 1329  Returns: the unsigned long
1329  */  */
1330    
1331  static int  static int
1332  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1333  {  {
1334  int result = 0;  int result = 0;
1335  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 729  return(result); Line 1340  return(result);
1340    
1341    
1342    
   
1343  /*************************************************  /*************************************************
1344  *            Convert UTF-8 string to value       *  *             Print one character                *
1345  *************************************************/  *************************************************/
1346    
1347  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
1348    
1349  Argument:  static int pchar(int c, FILE *f)
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1350  {  {
1351  int c = *utf8bytes++;  if (PRINTOK(c))
1352  int d = c;    {
1353  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1354      return 1;
1355      }
1356    
1357  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1358    {    {
1359    if ((d & 0x80) == 0) break;    if (use_utf)
1360    d <<= 1;      {
1361        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1362        return 6;
1363        }
1364      else
1365        {
1366        if (f != NULL) fprintf(f, "\\x%02x", c);
1367        return 4;
1368        }
1369    }    }
1370    
1371  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1372  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1373           (c <= 0x00000fff)? 7 :
1374           (c <= 0x0000ffff)? 8 :
1375           (c <= 0x000fffff)? 9 : 10;
1376    }
1377    
 /* i now has a value in the range 1-5 */  
1378    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1379    
1380  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1381    {  /*************************************************
1382    c = *utf8bytes++;  *         Print 8-bit character string           *
1383    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1384    
1385  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1386    If handed a NULL file, just counts chars without printing. */
1387    
1388  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1389    if (d <= utf8_table1[j]) break;  {
1390  if (j != i) return -(i+1);  int c = 0;
1391    int yield = 0;
1392    
1393  /* Valid value */  if (length < 0)
1394      length = strlen((char *)p);
1395    
1396  *vptr = d;  while (length-- > 0)
1397  return i+1;    {
1398  }  #if !defined NOUTF
1399      if (use_utf)
1400        {
1401        int rc = utf82ord(p, &c);
1402        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1403          {
1404          length -= rc - 1;
1405          p += rc;
1406          yield += pchar(c, f);
1407          continue;
1408          }
1409        }
1410    #endif
1411      c = *p++;
1412      yield += pchar(c, f);
1413      }
1414    
1415    return yield;
1416    }
1417  #endif  #endif
1418    
1419    
1420    
1421    #ifdef SUPPORT_PCRE16
1422  /*************************************************  /*************************************************
1423  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1424  *************************************************/  *************************************************/
1425    
1426  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1427  {  {
1428  register int i, j;  int len = 0;
1429  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1430    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1431  }  }
1432    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1433    
1434    
1435    #ifdef SUPPORT_PCRE16
1436  /*************************************************  /*************************************************
1437  *             Print character string             *  *           Print 16-bit character string        *
1438  *************************************************/  *************************************************/
1439    
1440  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1441  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1442    
1443  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1444  {  {
 int c = 0;  
1445  int yield = 0;  int yield = 0;
1446    
1447    if (length < 0)
1448      length = strlen16(p);
1449    
1450  while (length-- > 0)  while (length-- > 0)
1451    {    {
1452  #if !defined NOUTF8    int c = *p++ & 0xffff;
1453    if (use_utf8)  #if !defined NOUTF
1454      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1455      {      {
1456      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1457        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1458        {        {
1459        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1460        p += rc;        length--;
1461        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1462        }        }
1463      }      }
1464  #endif  #endif
1465      yield += pchar(c, f);
1466      }
1467    
1468     /* Not UTF-8, or malformed UTF-8  */  return yield;
1469    }
1470    #endif  /* SUPPORT_PCRE16 */
1471    
1472    c = *p++;  
1473    if (PRINTHEX(c))  
1474      {  #ifdef SUPPORT_PCRE8
1475      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1476      yield++;  *     Read a capture name (8-bit) and check it   *
1477      }  *************************************************/
1478    else  
1479      {  static pcre_uint8 *
1480      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1481      yield += 4;  {
1482      }  pcre_uint8 *npp = *pp;
1483    while (isalnum(*p)) *npp++ = *p++;
1484    *npp++ = 0;
1485    *npp = 0;
1486    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1487      {
1488      fprintf(outfile, "no parentheses with name \"");
1489      PCHARSV(*pp, 0, -1, outfile);
1490      fprintf(outfile, "\"\n");
1491    }    }
1492    
1493  return yield;  *pp = npp;
1494    return p;
1495  }  }
1496    #endif  /* SUPPORT_PCRE8 */
1497    
1498    
1499    
1500    #ifdef SUPPORT_PCRE16
1501    /*************************************************
1502    *     Read a capture name (16-bit) and check it  *
1503    *************************************************/
1504    
1505    /* Note that the text being read is 8-bit. */
1506    
1507    static pcre_uint8 *
1508    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1509    {
1510    pcre_uint16 *npp = *pp;
1511    while (isalnum(*p)) *npp++ = *p++;
1512    *npp++ = 0;
1513    *npp = 0;
1514    if (pcre16_get_stringnumber(re, (PCRE_SPTR16)(*pp)) < 0)
1515      {
1516      fprintf(outfile, "no parentheses with name \"");
1517      PCHARSV(*pp, 0, -1, outfile);
1518      fprintf(outfile, "\"\n");
1519      }
1520    *pp = npp;
1521    return p;
1522    }
1523    #endif  /* SUPPORT_PCRE16 */
1524    
1525    
1526    
# Line 916  if (callout_extra) Line 1549  if (callout_extra)
1549      else      else
1550        {        {
1551        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1552        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1553          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1554        fprintf(f, "\n");        fprintf(f, "\n");
1555        }        }
# Line 929  printed lengths of the substrings. */ Line 1562  printed lengths of the substrings. */
1562    
1563  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1564    
1565  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1566  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1567    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1568    
1569  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1570    
1571  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1572    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1573    
1574  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 974  first_callout = 0; Line 1607  first_callout = 0;
1607    
1608  if (cb->mark != last_callout_mark)  if (cb->mark != last_callout_mark)
1609    {    {
1610    fprintf(outfile, "Latest Mark: %s\n",    if (cb->mark == NULL)
1611      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));      fprintf(outfile, "Latest Mark: <unset>\n");
1612      else
1613        {
1614        fprintf(outfile, "Latest Mark: ");
1615        PCHARSV(cb->mark, 0, -1, outfile);
1616        putc('\n', outfile);
1617        }
1618    last_callout_mark = cb->mark;    last_callout_mark = cb->mark;
1619    }    }
1620    
# Line 999  return (cb->callout_number != callout_fa Line 1638  return (cb->callout_number != callout_fa
1638  *************************************************/  *************************************************/
1639    
1640  /* Alternative malloc function, to test functionality and save the size of a  /* Alternative malloc function, to test functionality and save the size of a
1641  compiled re. The show_malloc variable is set only during matching. */  compiled re, which is the first store request that pcre_compile() makes. The
1642    show_malloc variable is set only during matching. */
1643    
1644  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1645  {  {
1646  void *block = malloc(size);  void *block = malloc(size);
1647  gotten_store = size;  gotten_store = size;
1648    if (first_gotten_store == 0) first_gotten_store = size;
1649  if (show_malloc)  if (show_malloc)
1650    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1651  return block;  return block;
# Line 1036  free(block); Line 1677  free(block);
1677    
1678    
1679  /*************************************************  /*************************************************
1680  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1681    *************************************************/
1682    
1683    /* Get one piece of information from the pcre_fullinfo() function. When only
1684    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1685    value, but the code is defensive.
1686    
1687    Arguments:
1688      re        compiled regex
1689      study     study data
1690      option    PCRE_INFO_xxx option
1691      ptr       where to put the data
1692    
1693    Returns:    0 when OK, < 0 on error
1694    */
1695    
1696    static int
1697    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1698    {
1699    int rc;
1700    
1701    if (use_pcre16)
1702    #ifdef SUPPORT_PCRE16
1703      rc = pcre16_fullinfo(re, study, option, ptr);
1704    #else
1705      rc = PCRE_ERROR_BADMODE;
1706    #endif
1707    else
1708    #ifdef SUPPORT_PCRE8
1709      rc = pcre_fullinfo(re, study, option, ptr);
1710    #else
1711      rc = PCRE_ERROR_BADMODE;
1712    #endif
1713    
1714    if (rc < 0)
1715      {
1716      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1717        use_pcre16? "16" : "", option);
1718      if (rc == PCRE_ERROR_BADMODE)
1719        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1720          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1721      }
1722    
1723    return rc;
1724    }
1725    
1726    
1727    
1728    /*************************************************
1729    *             Swap byte functions                *
1730    *************************************************/
1731    
1732    /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1733    value, respectively.
1734    
1735    Arguments:
1736      value        any number
1737    
1738    Returns:       the byte swapped value
1739    */
1740    
1741    static pcre_uint32
1742    swap_uint32(pcre_uint32 value)
1743    {
1744    return ((value & 0x000000ff) << 24) |
1745           ((value & 0x0000ff00) <<  8) |
1746           ((value & 0x00ff0000) >>  8) |
1747           (value >> 24);
1748    }
1749    
1750    static pcre_uint16
1751    swap_uint16(pcre_uint16 value)
1752    {
1753    return (value >> 8) | (value << 8);
1754    }
1755    
1756    
1757    
1758    /*************************************************
1759    *        Flip bytes in a compiled pattern        *
1760  *************************************************/  *************************************************/
1761    
1762  /* Get one piece of information from the pcre_fullinfo() function */  /* This function is called if the 'F' option was present on a pattern that is
1763    to be written to a file. We flip the bytes of all the integer fields in the
1764    regex data block and the study block. In 16-bit mode this also flips relevant
1765    bytes in the pattern itself. This is to make it possible to test PCRE's
1766    ability to reload byte-flipped patterns, e.g. those compiled on a different
1767    architecture. */
1768    
1769  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void
1770    regexflip(pcre *ere, pcre_extra *extra)
1771  {  {
1772  int rc;  real_pcre *re = (real_pcre *)ere;
1773  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  #ifdef SUPPORT_PCRE16
1774    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  int op;
1775  }  pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1776    int length = re->name_count * re->name_entry_size;
1777    #ifdef SUPPORT_UTF
1778    BOOL utf = (re->options & PCRE_UTF16) != 0;
1779    BOOL utf16_char = FALSE;
1780    #endif /* SUPPORT_UTF */
1781    #endif /* SUPPORT_PCRE16 */
1782    
1783    /* Always flip the bytes in the main data block and study blocks. */
1784    
1785    re->magic_number = REVERSED_MAGIC_NUMBER;
1786    re->size = swap_uint32(re->size);
1787    re->options = swap_uint32(re->options);
1788    re->flags = swap_uint16(re->flags);
1789    re->top_bracket = swap_uint16(re->top_bracket);
1790    re->top_backref = swap_uint16(re->top_backref);
1791    re->first_char = swap_uint16(re->first_char);
1792    re->req_char = swap_uint16(re->req_char);
1793    re->name_table_offset = swap_uint16(re->name_table_offset);
1794    re->name_entry_size = swap_uint16(re->name_entry_size);
1795    re->name_count = swap_uint16(re->name_count);
1796    
1797    if (extra != NULL)
1798      {
1799      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1800      rsd->size = swap_uint32(rsd->size);
1801      rsd->flags = swap_uint32(rsd->flags);
1802      rsd->minlength = swap_uint32(rsd->minlength);
1803      }
1804    
1805    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1806    in the name table, if present, and then in the pattern itself. */
1807    
1808  /*************************************************  #ifdef SUPPORT_PCRE16
1809  *         Byte flipping function                 *  if (!use_pcre16) return;
 *************************************************/  
1810    
1811  static unsigned long int  while(TRUE)
1812  byteflip(unsigned long int value, int n)    {
1813  {    /* Swap previous characters. */
1814  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);    while (length-- > 0)
1815  return ((value & 0x000000ff) << 24) |      {
1816         ((value & 0x0000ff00) <<  8) |      *ptr = swap_uint16(*ptr);
1817         ((value & 0x00ff0000) >>  8) |      ptr++;
1818         ((value & 0xff000000) >> 24);      }
1819  }  #ifdef SUPPORT_UTF
1820      if (utf16_char)
1821        {
1822        if ((ptr[-1] & 0xfc00) == 0xd800)
1823          {
1824          /* We know that there is only one extra character in UTF-16. */
1825          *ptr = swap_uint16(*ptr);
1826          ptr++;
1827          }
1828        }
1829      utf16_char = FALSE;
1830    #endif /* SUPPORT_UTF */
1831    
1832      /* Get next opcode. */
1833    
1834      length = 0;
1835      op = *ptr;
1836      *ptr++ = swap_uint16(op);
1837    
1838      switch (op)
1839        {
1840        case OP_END:
1841        return;
1842    
1843    #ifdef SUPPORT_UTF
1844        case OP_CHAR:
1845        case OP_CHARI:
1846        case OP_NOT:
1847        case OP_NOTI:
1848        case OP_STAR:
1849        case OP_MINSTAR:
1850        case OP_PLUS:
1851        case OP_MINPLUS:
1852        case OP_QUERY:
1853        case OP_MINQUERY:
1854        case OP_UPTO:
1855        case OP_MINUPTO:
1856        case OP_EXACT:
1857        case OP_POSSTAR:
1858        case OP_POSPLUS:
1859        case OP_POSQUERY:
1860        case OP_POSUPTO:
1861        case OP_STARI:
1862        case OP_MINSTARI:
1863        case OP_PLUSI:
1864        case OP_MINPLUSI:
1865        case OP_QUERYI:
1866        case OP_MINQUERYI:
1867        case OP_UPTOI:
1868        case OP_MINUPTOI:
1869        case OP_EXACTI:
1870        case OP_POSSTARI:
1871        case OP_POSPLUSI:
1872        case OP_POSQUERYI:
1873        case OP_POSUPTOI:
1874        case OP_NOTSTAR:
1875        case OP_NOTMINSTAR:
1876        case OP_NOTPLUS:
1877        case OP_NOTMINPLUS:
1878        case OP_NOTQUERY:
1879        case OP_NOTMINQUERY:
1880        case OP_NOTUPTO:
1881        case OP_NOTMINUPTO:
1882        case OP_NOTEXACT:
1883        case OP_NOTPOSSTAR:
1884        case OP_NOTPOSPLUS:
1885        case OP_NOTPOSQUERY:
1886        case OP_NOTPOSUPTO:
1887        case OP_NOTSTARI:
1888        case OP_NOTMINSTARI:
1889        case OP_NOTPLUSI:
1890        case OP_NOTMINPLUSI:
1891        case OP_NOTQUERYI:
1892        case OP_NOTMINQUERYI:
1893        case OP_NOTUPTOI:
1894        case OP_NOTMINUPTOI:
1895        case OP_NOTEXACTI:
1896        case OP_NOTPOSSTARI:
1897        case OP_NOTPOSPLUSI:
1898        case OP_NOTPOSQUERYI:
1899        case OP_NOTPOSUPTOI:
1900        if (utf) utf16_char = TRUE;
1901    #endif
1902        /* Fall through. */
1903    
1904        default:
1905        length = OP_lengths16[op] - 1;
1906        break;
1907    
1908        case OP_CLASS:
1909        case OP_NCLASS:
1910        /* Skip the character bit map. */
1911        ptr += 32/sizeof(pcre_uint16);
1912        length = 0;
1913        break;
1914    
1915        case OP_XCLASS:
1916        /* Reverse the size of the XCLASS instance. */
1917        ptr++;
1918        *ptr = swap_uint16(*ptr);
1919        if (LINK_SIZE > 1)
1920          {
1921          /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1922          ptr++;
1923          *ptr = swap_uint16(*ptr);
1924          }
1925        ptr++;
1926    
1927        if (LINK_SIZE > 1)
1928          length = ((ptr[-LINK_SIZE] << 16) | ptr[-LINK_SIZE + 1]) -
1929            (1 + LINK_SIZE + 1);
1930        else
1931          length = ptr[-LINK_SIZE] - (1 + LINK_SIZE + 1);
1932    
1933        op = *ptr;
1934        *ptr = swap_uint16(op);
1935        if ((op & XCL_MAP) != 0)
1936          {
1937          /* Skip the character bit map. */
1938          ptr += 32/sizeof(pcre_uint16);
1939          length -= 32/sizeof(pcre_uint16);
1940          }
1941        break;
1942        }
1943      }
1944    /* Control should never reach here in 16 bit mode. */
1945    #endif /* SUPPORT_PCRE16 */
1946    }
1947    
1948    
1949    
# Line 1072  return ((value & 0x000000ff) << 24) | Line 1952  return ((value & 0x000000ff) << 24) |
1952  *************************************************/  *************************************************/
1953    
1954  static int  static int
1955  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
1956    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
1957    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
1958  {  {
# Line 1087  for (;;) Line 1967  for (;;)
1967    {    {
1968    *limit = mid;    *limit = mid;
1969    
1970    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1971      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
1972    
1973    if (count == errnumber)    if (count == errnumber)
# Line 1132  Returns: < 0, = 0, or > 0, according Line 2012  Returns: < 0, = 0, or > 0, according
2012  */  */
2013    
2014  static int  static int
2015  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2016  {  {
2017  while (n--)  while (n--)
2018    {    {
# Line 1159  Returns: appropriate PCRE_NEWLINE_x Line 2039  Returns: appropriate PCRE_NEWLINE_x
2039  */  */
2040    
2041  static int  static int
2042  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2043  {  {
2044  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2045  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2046  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2047  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2048  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2049  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2050  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2051  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2052  return 0;  return 0;
2053  }  }
# Line 1189  printf("If input is a terminal, readline Line 2069  printf("If input is a terminal, readline
2069  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2070  #endif  #endif
2071  printf("\nOptions:\n");  printf("\nOptions:\n");
2072    #ifdef SUPPORT_PCRE16
2073    printf("  -16      use 16-bit interface\n");
2074    #endif
2075  printf("  -b       show compiled code (bytecode)\n");  printf("  -b       show compiled code (bytecode)\n");
2076  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2077    printf("  -C arg   show a specific compile-time option\n");
2078    printf("           and exit with its value. The arg can be:\n");
2079    printf("     linksize     internal link size [2, 3, 4]\n");
2080    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2081    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2082    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2083    printf("     ucp          Unicode Properties supported [0, 1]\n");
2084    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2085  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2086  #if !defined NODFA  #if !defined NODFA
2087  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1226  options, followed by a set of test data, Line 2117  options, followed by a set of test data,
2117  int main(int argc, char **argv)  int main(int argc, char **argv)
2118  {  {
2119  FILE *infile = stdin;  FILE *infile = stdin;
2120    const char *version;
2121  int options = 0;  int options = 0;
2122  int study_options = 0;  int study_options = 0;
2123  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1251  int stack_size; Line 2143  int stack_size;
2143    
2144  pcre_jit_stack *jit_stack = NULL;  pcre_jit_stack *jit_stack = NULL;
2145    
2146    /* These vectors store, end-to-end, a list of zero-terminated captured
2147  /* These vectors store, end-to-end, a list of captured substring names. Assume  substring names, each list itself being terminated by an empty name. Assume
2148  that 1024 is plenty long enough for the few names we'll be testing. */  that 1024 is plenty long enough for the few names we'll be testing. It is
2149    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2150  uschar copynames[1024];  for the actual memory, to ensure alignment. By defining these variables always
2151  uschar getnames[1024];  (whether or not 8-bit or 16-bit is supported), we avoid too much mess with
2152    #ifdefs in the code. */
2153  uschar *copynamesptr;  
2154  uschar *getnamesptr;  pcre_uint16 copynames[1024];
2155    pcre_uint16 getnames[1024];
2156  /* Get buffers from malloc() so that Electric Fence will check their misuse  
2157  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint16 *cn16ptr;
2158    pcre_uint16 *gn16ptr;
2159  buffer = (unsigned char *)malloc(buffer_size);  
2160  dbuffer = (unsigned char *)malloc(buffer_size);  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2161  pbuffer = (unsigned char *)malloc(buffer_size);  pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2162    pcre_uint8 *cn8ptr;
2163    pcre_uint8 *gn8ptr;
2164    
2165    /* Get buffers from malloc() so that valgrind will check their misuse when
2166    debugging. They grow automatically when very long lines are read. The 16-bit
2167    buffer (buffer16) is obtained only if needed. */
2168    
2169    buffer = (pcre_uint8 *)malloc(buffer_size);
2170    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2171    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2172    
2173  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2174    
# Line 1281  it set 0x8000, but then I was advised th Line 2183  it set 0x8000, but then I was advised th
2183  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2184  #endif  #endif
2185    
2186    /* Get the version number: both pcre_version() and pcre16_version() give the
2187    same answer. We just need to ensure that we call one that is available. */
2188    
2189    #ifdef SUPPORT_PCRE8
2190    version = pcre_version();
2191    #else
2192    version = pcre16_version();
2193    #endif
2194    
2195  /* Scan options */  /* Scan options */
2196    
2197  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2198    {    {
2199    unsigned char *endptr;    pcre_uint8 *endptr;
2200    
2201    if (strcmp(argv[op], "-m") == 0) showstore = 1;    if (strcmp(argv[op], "-m") == 0) showstore = 1;
2202    else if (strcmp(argv[op], "-s") == 0) force_study = 0;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
# Line 1294  while (argc > 1 && argv[op][0] == '-') Line 2205  while (argc > 1 && argv[op][0] == '-')
2205      force_study = 1;      force_study = 1;
2206      force_study_options = PCRE_STUDY_JIT_COMPILE;      force_study_options = PCRE_STUDY_JIT_COMPILE;
2207      }      }
2208      else if (strcmp(argv[op], "-16") == 0)
2209        {
2210    #ifdef SUPPORT_PCRE16
2211        use_pcre16 = 1;
2212    #else
2213        printf("** This version of PCRE was built without 16-bit support\n");
2214        exit(1);
2215    #endif
2216        }
2217    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
2218    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
2219    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
# Line 1303  while (argc > 1 && argv[op][0] == '-') Line 2223  while (argc > 1 && argv[op][0] == '-')
2223    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
2224  #endif  #endif
2225    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
2226        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2227          *endptr == 0))          *endptr == 0))
2228      {      {
2229      op++;      op++;
# Line 1313  while (argc > 1 && argv[op][0] == '-') Line 2233  while (argc > 1 && argv[op][0] == '-')
2233      {      {
2234      int both = argv[op][2] == 0;      int both = argv[op][2] == 0;
2235      int temp;      int temp;
2236      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2237                       *endptr == 0))                       *endptr == 0))
2238        {        {
2239        timeitm = temp;        timeitm = temp;
# Line 1324  while (argc > 1 && argv[op][0] == '-') Line 2244  while (argc > 1 && argv[op][0] == '-')
2244      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2245      }      }
2246    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
2247        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2248          *endptr == 0))          *endptr == 0))
2249      {      {
2250  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
# Line 1352  while (argc > 1 && argv[op][0] == '-') Line 2272  while (argc > 1 && argv[op][0] == '-')
2272      {      {
2273      int rc;      int rc;
2274      unsigned long int lrc;      unsigned long int lrc;
2275      printf("PCRE version %s\n", pcre_version());  
2276        if (argc > 2)
2277          {
2278          if (strcmp(argv[op + 1], "linksize") == 0)
2279            {
2280            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2281            printf("%d\n", rc);
2282            yield = rc;
2283            goto EXIT;
2284            }
2285          if (strcmp(argv[op + 1], "pcre8") == 0)
2286            {
2287    #ifdef SUPPORT_PCRE8
2288            printf("1\n");
2289            yield = 1;
2290    #else
2291            printf("0\n");
2292            yield = 0;
2293    #endif
2294            goto EXIT;
2295            }
2296          if (strcmp(argv[op + 1], "pcre16") == 0)
2297            {
2298    #ifdef SUPPORT_PCRE16
2299            printf("1\n");
2300            yield = 1;
2301    #else
2302            printf("0\n");
2303            yield = 0;
2304    #endif
2305            goto EXIT;
2306            }
2307          if (strcmp(argv[op + 1], "utf") == 0)
2308            {
2309    #ifdef SUPPORT_PCRE8
2310            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2311            printf("%d\n", rc);
2312            yield = rc;
2313    #else
2314            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2315            printf("%d\n", rc);
2316            yield = rc;
2317    #endif
2318            goto EXIT;
2319            }
2320          if (strcmp(argv[op + 1], "ucp") == 0)
2321            {
2322            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2323            printf("%d\n", rc);
2324            yield = rc;
2325            goto EXIT;
2326            }
2327          if (strcmp(argv[op + 1], "jit") == 0)
2328            {
2329            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2330            printf("%d\n", rc);
2331            yield = rc;
2332            goto EXIT;
2333            }
2334          if (strcmp(argv[op + 1], "newline") == 0)
2335            {
2336            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2337            /* Note that these values are always the ASCII values, even
2338            in EBCDIC environments. CR is 13 and NL is 10. */
2339            printf("%s\n", (rc == 13)? "CR" :
2340              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2341              (rc == -2)? "ANYCRLF" :
2342              (rc == -1)? "ANY" : "???");
2343            goto EXIT;
2344            }
2345          printf("Unknown -C option: %s\n", argv[op + 1]);
2346          goto EXIT;
2347          }
2348    
2349        printf("PCRE version %s\n", version);
2350      printf("Compiled with\n");      printf("Compiled with\n");
2351    
2352    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2353    are set, either both UTFs are supported or both are not supported. */
2354    
2355    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2356        printf("  8-bit and 16-bit support\n");
2357        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2358        if (rc)
2359          printf("  UTF-8 and UTF-16 support\n");
2360        else
2361          printf("  No UTF-8 or UTF-16 support\n");
2362    #elif defined SUPPORT_PCRE8
2363        printf("  8-bit support only\n");
2364      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2365      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2366      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2367        printf("  16-bit support only\n");
2368        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2369        printf("  %sUTF-16 support\n", rc? "" : "No ");
2370    #endif
2371    
2372        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2373      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2374      (void)pcre_config(PCRE_CONFIG_JIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2375      if (rc)      if (rc)
2376        printf("  Just-in-time compiler support\n");        printf("  Just-in-time compiler support\n");
2377      else      else
2378        printf("  No just-in-time compiler support\n");        printf("  No just-in-time compiler support\n");
2379      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2380      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2381      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2382      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2383        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2384        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2385        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2386      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2387      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2388                                       "all Unicode newlines");                                       "all Unicode newlines");
2389      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2390      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2391      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2392      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2393      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2394      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2395      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2396      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2397      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2398      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
2399      goto EXIT;      goto EXIT;
2400      }      }
# Line 1440  if (argc > 2) Line 2453  if (argc > 2)
2453    
2454  /* Set alternative malloc function */  /* Set alternative malloc function */
2455    
2456    #ifdef SUPPORT_PCRE8
2457  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2458  pcre_free = new_free;  pcre_free = new_free;
2459  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2460  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2461    #endif
2462    
2463    #ifdef SUPPORT_PCRE16
2464    pcre16_malloc = new_malloc;
2465    pcre16_free = new_free;
2466    pcre16_stack_malloc = stack_malloc;
2467    pcre16_stack_free = stack_free;
2468    #endif
2469    
2470  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2471    
2472  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2473    
2474  /* Main loop */  /* Main loop */
2475    
# Line 1462  while (!done) Line 2484  while (!done)
2484  #endif  #endif
2485    
2486    const char *error;    const char *error;
2487    unsigned char *markptr;    pcre_uint8 *markptr;
2488    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2489    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2490    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2491    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2492    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2493    int do_allcaps = 0;    int do_allcaps = 0;
# Line 1481  while (!done) Line 2503  while (!done)
2503    int do_flip = 0;    int do_flip = 0;
2504    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2505    
2506    use_utf8 = 0;    use_utf = 0;
2507    debug_lengths = 1;    debug_lengths = 1;
2508    
2509    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1497  while (!done) Line 2519  while (!done)
2519    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2520      {      {
2521      unsigned long int magic, get_options;      unsigned long int magic, get_options;
2522      uschar sbuf[8];      pcre_uint8 sbuf[8];
2523      FILE *f;      FILE *f;
2524    
2525      p++;      p++;
# Line 1520  while (!done) Line 2542  while (!done)
2542        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2543    
2544      re = (real_pcre *)new_malloc(true_size);      re = (real_pcre *)new_malloc(true_size);
2545      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2546    
2547      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2548    
2549      magic = ((real_pcre *)re)->magic_number;      magic = ((real_pcre *)re)->magic_number;
2550      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2551        {        {
2552        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2553          {          {
2554          do_flip = 1;          do_flip = 1;
2555          }          }
# Line 1542  while (!done) Line 2564  while (!done)
2564      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2565        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
2566    
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
2567      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
2568    
2569      if (true_study_size != 0)      if (true_study_size != 0)
# Line 1563  while (!done) Line 2580  while (!done)
2580          {          {
2581          FAIL_READ:          FAIL_READ:
2582          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2583          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
2584              {
2585              PCRE_FREE_STUDY(extra);
2586              }
2587          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2588          fclose(f);          fclose(f);
2589          continue;          continue;
# Line 1573  while (!done) Line 2593  while (!done)
2593        }        }
2594      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2595    
2596        /* Flip the necessary bytes. */
2597        if (do_flip)
2598          {
2599          PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
2600          }
2601    
2602        /* Need to know if UTF-8 for printing data strings. */
2603    
2604        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2605        use_utf = (get_options & PCRE_UTF8) != 0;
2606    
2607      fclose(f);      fclose(f);
2608      goto SHOW_INFO;      goto SHOW_INFO;
2609      }      }
2610    
2611    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2612    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2613    
2614    delimiter = *p++;    delimiter = *p++;
2615    
# Line 1629  while (!done) Line 2660  while (!done)
2660    /* Look for options after final delimiter */    /* Look for options after final delimiter */
2661    
2662    options = 0;    options = 0;
2663      study_options = 0;
2664    log_store = showstore;  /* default from command line */    log_store = showstore;  /* default from command line */
2665    
2666    while (*pp != 0)    while (*pp != 0)
# Line 1686  while (!done) Line 2718  while (!done)
2718        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2719        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2720        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2721        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2722        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2723    
2724        case 'T':        case 'T':
# Line 1720  while (!done) Line 2752  while (!done)
2752          goto SKIP_DATA;          goto SKIP_DATA;
2753          }          }
2754        locale_set = 1;        locale_set = 1;
2755        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2756        pp = ppp;        pp = ppp;
2757        break;        break;
2758    
# Line 1733  while (!done) Line 2765  while (!done)
2765    
2766        case '<':        case '<':
2767          {          {
2768          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2769            {            {
2770            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2771            pp += 3;            pp += 3;
# Line 1761  while (!done) Line 2793  while (!done)
2793    
2794    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2795    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2796    local character tables. */    local character tables. Neither does it have 16-bit support. */
2797    
2798  #if !defined NOPOSIX  #if !defined NOPOSIX
2799    if (posix || do_posix)    if (posix || do_posix)
# Line 1777  while (!done) Line 2809  while (!done)
2809      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2810      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2811    
2812        first_gotten_store = 0;
2813      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2814    
2815      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1798  while (!done) Line 2831  while (!done)
2831      {      {
2832      unsigned long int get_options;      unsigned long int get_options;
2833    
2834        /* In 16-bit mode, convert the input. */
2835    
2836    #ifdef SUPPORT_PCRE16
2837        if (use_pcre16)
2838          {
2839          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2840            {
2841            case -1:
2842            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2843              "converted to UTF-16\n");
2844            goto SKIP_DATA;
2845    
2846            case -2:
2847            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2848              "cannot be converted to UTF-16\n");
2849            goto SKIP_DATA;
2850    
2851            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2852            fprintf(outfile, "**Failed: character value greater than 0xffff "
2853              "cannot be converted to 16-bit in non-UTF mode\n");
2854            goto SKIP_DATA;
2855    
2856            default:
2857            break;
2858            }
2859          p = (pcre_uint8 *)buffer16;
2860          }
2861    #endif
2862    
2863        /* Compile many times when timing */
2864    
2865      if (timeit > 0)      if (timeit > 0)
2866        {        {
2867        register int i;        register int i;
# Line 1805  while (!done) Line 2869  while (!done)
2869        clock_t start_time = clock();        clock_t start_time = clock();
2870        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2871          {          {
2872          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2873          if (re != NULL) free(re);          if (re != NULL) free(re);
2874          }          }
2875        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1814  while (!done) Line 2878  while (!done)
2878            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2879        }        }
2880    
2881      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2882        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2883    
2884      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2885      if non-interactive. */      if non-interactive. */
# Line 1845  while (!done) Line 2910  while (!done)
2910      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
2911      lines. */      lines. */
2912    
2913      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
2914      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
2915        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2916    
2917        /* Extract the size for possible writing before possibly flipping it,
2918        and remember the store that was got. */
2919    
2920        true_size = ((real_pcre *)re)->size;
2921        regex_gotten_store = first_gotten_store;
2922    
2923      /* Print information if required. There are now two info-returning      /* Output code size information if requested */
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
2924    
2925      if (log_store)      if (log_store)
2926        fprintf(outfile, "Memory allocation (code space): %d\n",        fprintf(outfile, "Memory allocation (code space): %d\n",
2927          (int)(gotten_store -          (int)(first_gotten_store -
2928                sizeof(real_pcre) -                sizeof(real_pcre) -
2929                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
2930    
     /* Extract the size for possible writing before possibly flipping it,  
     and remember the store that was got. */  
   
     true_size = ((real_pcre *)re)->size;  
     regex_gotten_store = gotten_store;  
   
2931      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
2932      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
2933      suppresses the effect of /S (used for a few test patterns where studying is      suppresses the effect of /S (used for a few test patterns where studying is
# Line 1877  while (!done) Line 2941  while (!done)
2941          clock_t time_taken;          clock_t time_taken;
2942          clock_t start_time = clock();          clock_t start_time = clock();
2943          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
2944            extra = pcre_study(re, study_options | force_study_options, &error);            {
2945              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2946              }
2947          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2948          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
2949              {
2950              PCRE_FREE_STUDY(extra);
2951              }
2952          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2953            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
2954              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2955          }          }
2956        extra = pcre_study(re, study_options | force_study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2957        if (error != NULL)        if (error != NULL)
2958          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2959        else if (extra != NULL)        else if (extra != NULL)
2960            {
2961          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
2962            if (log_store)
2963              {
2964              size_t jitsize;
2965              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
2966                  jitsize != 0)
2967                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
2968              }
2969            }
2970        }        }
2971    
2972      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1904  while (!done) Line 2982  while (!done)
2982        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
2983        }        }
2984    
2985      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
2986    
2987      SHOW_INFO:      SHOW_INFO:
2988    
2989      if (do_debug)      if (do_debug)
2990        {        {
2991        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
2992        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
2993        }        }
2994    
2995      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1956  while (!done) Line 2997  while (!done)
2997      if (do_showinfo)      if (do_showinfo)
2998        {        {
2999        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3000        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3001          hascrorlf;          hascrorlf;
3002        int nameentrysize, namecount;        int nameentrysize, namecount;
3003        const uschar *nametable;        const pcre_uint8 *nametable;
3004    
3005        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3006        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3007        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3008        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3009        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3010        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3011        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3012        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3013        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3014        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3015        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3016              != 0)
3017  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3018    
3019        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3020          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 2009  while (!done) Line 3029  while (!done)
3029          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3030          while (namecount-- > 0)          while (namecount-- > 0)
3031            {            {
3032            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3033              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3034              GET2(nametable, 0));  #else
3035              int imm2_size = IMM2_SIZE;
3036    #endif
3037              int length = (int)STRLEN(nametable + imm2_size);
3038              fprintf(outfile, "  ");
3039              PCHARSV(nametable, imm2_size, length, outfile);
3040              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3041    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3042              fprintf(outfile, "%3d\n", use_pcre16?
3043                 (int)(((PCRE_SPTR16)nametable)[0])
3044                :((int)nametable[0] << 8) | (int)nametable[1]);
3045              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3046    #else
3047              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3048    #ifdef SUPPORT_PCRE8
3049            nametable += nameentrysize;            nametable += nameentrysize;
3050    #else
3051              nametable += nameentrysize * 2;
3052    #endif
3053    #endif
3054            }            }
3055          }          }
3056    
# Line 2020  while (!done) Line 3058  while (!done)
3058        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3059    
3060        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
3061        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3062    
3063        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3064          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 2036  while (!done) Line 3074  while (!done)
3074            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3075            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3076            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3077            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3078            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3079            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3080            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3081            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3082    
# Line 2080  while (!done) Line 3118  while (!done)
3118          }          }
3119        else        else
3120          {          {
3121          int ch = first_char & 255;          const char *caseless =
3122          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3123            "" : " (caseless)";            "" : " (caseless)";
3124          if (PRINTHEX(ch))  
3125            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3126              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3127          else          else
3128            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3129              fprintf(outfile, "First char = ");
3130              pchar(first_char, outfile);
3131              fprintf(outfile, "%s\n", caseless);
3132              }
3133          }          }
3134    
3135        if (need_char < 0)        if (need_char < 0)
# Line 2095  while (!done) Line 3138  while (!done)
3138          }          }
3139        else        else
3140          {          {
3141          int ch = need_char & 255;          const char *caseless =
3142          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3143            "" : " (caseless)";            "" : " (caseless)";
3144          if (PRINTHEX(ch))  
3145            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3146              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3147          else          else
3148            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3149              fprintf(outfile, "Need char = ");
3150              pchar(need_char, outfile);
3151              fprintf(outfile, "%s\n", caseless);
3152              }
3153          }          }
3154    
3155        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
# Line 2118  while (!done) Line 3166  while (!done)
3166            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3167          else          else
3168            {            {
3169            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3170            int minlength;            int minlength;
3171    
3172            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3173            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3174    
3175            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3176              {              {
3177              int i;              if (start_bits == NULL)
3178              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3179              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3180                {                {
3181                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3182                  int c = 24;
3183                  fprintf(outfile, "Starting byte set: ");
3184                  for (i = 0; i < 256; i++)
3185                  {                  {
3186                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
3187                    {                    {
3188                    fprintf(outfile, "%c ", i);                    if (c > 75)
3189                    c += 2;                      {
3190                    }                      fprintf(outfile, "\n  ");
3191                  else                      c = 2;
3192                    {                      }
3193                    fprintf(outfile, "\\x%02x ", i);                    if (PRINTOK(i) && i != ' ')
3194                    c += 5;                      {
3195                        fprintf(outfile, "%c ", i);
3196                        c += 2;
3197                        }
3198                      else
3199                        {
3200                        fprintf(outfile, "\\x%02x ", i);
3201                        c += 5;
3202                        }
3203                    }                    }
3204                  }                  }
3205                  fprintf(outfile, "\n");
3206                }                }
             fprintf(outfile, "\n");  
3207              }              }
3208            }            }
3209    
# Line 2162  while (!done) Line 3212  while (!done)
3212          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)          if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3213            {            {
3214            int jit;            int jit;
3215            new_info(re, extra, PCRE_INFO_JIT, &jit);            if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3216            if (jit)              {
3217              fprintf(outfile, "JIT study was successful\n");              if (jit)
3218            else                fprintf(outfile, "JIT study was successful\n");
3219                else
3220  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
3221              fprintf(outfile, "JIT study was not successful\n");                fprintf(outfile, "JIT study was not successful\n");
3222  #else  #else
3223              fprintf(outfile, "JIT support is not available in this version of PCRE\n");                fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3224  #endif  #endif
3225                }
3226            }            }
3227          }          }
3228        }        }
# Line 2188  while (!done) Line 3240  while (!done)
3240          }          }
3241        else        else
3242          {          {
3243          uschar sbuf[8];          pcre_uint8 sbuf[8];
3244          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3245          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3246          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3247          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3248            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3249          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3250          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3251          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3252          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3253            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3254    
3255          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3256              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2225  while (!done) Line 3278  while (!done)
3278          }          }
3279    
3280        new_free(re);        new_free(re);
3281        if (extra != NULL) pcre_free_study(extra);        if (extra != NULL)
3282            {
3283            PCRE_FREE_STUDY(extra);
3284            }
3285        if (locale_set)        if (locale_set)
3286          {          {
3287          new_free((void *)tables);          new_free((void *)tables);
# Line 2240  while (!done) Line 3296  while (!done)
3296    
3297    for (;;)    for (;;)
3298      {      {
3299      uschar *q;      pcre_uint8 *q;
3300      uschar *bptr;      pcre_uint8 *bptr;
3301      int *use_offsets = offsets;      int *use_offsets = offsets;
3302      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3303      int callout_data = 0;      int callout_data = 0;
# Line 2257  while (!done) Line 3313  while (!done)
3313      int g_notempty = 0;      int g_notempty = 0;
3314      int use_dfa = 0;      int use_dfa = 0;
3315    
     options = 0;  
   
3316      *copynames = 0;      *copynames = 0;
3317      *getnames = 0;      *getnames = 0;
3318    
3319      copynamesptr = copynames;      cn16ptr = copynames;
3320      getnamesptr = getnames;      gn16ptr = getnames;
3321        cn8ptr = copynames8;
3322        gn8ptr = getnames8;
3323    
3324      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3325      first_callout = 1;      first_callout = 1;
3326      last_callout_mark = NULL;      last_callout_mark = NULL;
3327      callout_extra = 0;      callout_extra = 0;
# Line 2273  while (!done) Line 3329  while (!done)
3329      callout_fail_count = 999999;      callout_fail_count = 999999;
3330      callout_fail_id = -1;      callout_fail_id = -1;
3331      show_malloc = 0;      show_malloc = 0;
3332        options = 0;
3333    
3334      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3335        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 2307  while (!done) Line 3364  while (!done)
3364        {        {
3365        int i = 0;        int i = 0;
3366        int n = 0;        int n = 0;
3367    
3368        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3369          In non-UTF mode, allow the value of the byte to fall through to later,
3370          where values greater than 127 are turned into UTF-8 when running in
3371          16-bit mode. */
3372    
3373          if (c != '\\')
3374            {
3375            if (use_utf)
3376              {
3377              *q++ = c;
3378              continue;
3379              }
3380            }
3381    
3382          /* Handle backslash escapes */
3383    
3384          else switch ((c = *p++))
3385          {          {
3386          case 'a': c =    7; break;          case 'a': c =    7; break;
3387          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 2324  while (!done) Line 3397  while (!done)
3397          c -= '0';          c -= '0';
3398          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3399            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3400          break;          break;
3401    
3402          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3403          if (*p == '{')          if (*p == '{')
3404            {            {
3405            unsigned char *pt = p;            pcre_uint8 *pt = p;
3406            c = 0;            c = 0;
3407    
3408            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3409            when isxdigit() is a macro that refers to its argument more than            when isxdigit() is a macro that refers to its argument more than
3410            once. This is banned by the C Standard, but apparently happens in at            once. This is banned by the C Standard, but apparently happens in at
3411            least one MacOS environment. */            least one MacOS environment. */
3412    
3413            for (pt++; isxdigit(*pt); pt++)            for (pt++; isxdigit(*pt); pt++)
3414              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3415            if (*pt == '}')            if (*pt == '}')
3416              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3417              p = pt + 1;              p = pt + 1;
3418              break;              break;
3419              }              }
3420            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3421            }            }
 #endif  
3422    
3423          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3424            allows UTF-8 characters to be constructed byte by byte, and also allows
3425            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3426            Otherwise, pass it down to later code so that it can be turned into
3427            UTF-8 when running in 16-bit mode. */
3428    
3429          c = 0;          c = 0;
3430          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
# Line 2386  while (!done) Line 3432  while (!done)
3432            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3433            p++;            p++;
3434            }            }
3435            if (use_utf)
3436              {
3437              *q++ = c;
3438              continue;
3439              }
3440          break;          break;
3441    
3442          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 2418  while (!done) Line 3469  while (!done)
3469            }            }
3470          else if (isalnum(*p))          else if (isalnum(*p))
3471            {            {
3472            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3473            }            }
3474          else if (*p == '+')          else if (*p == '+')
3475            {            {
# Line 2434  while (!done) Line 3478  while (!done)
3478            }            }
3479          else if (*p == '-')          else if (*p == '-')
3480            {            {
3481            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3482            p++;            p++;
3483            }            }
3484          else if (*p == '!')          else if (*p == '!')
# Line 2488  while (!done) Line 3532  while (!done)
3532            }            }
3533          else if (isalnum(*p))          else if (isalnum(*p))
3534            {            {
3535            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)getnamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);  
           getnamesptr = npp;  
3536            }            }
3537          continue;          continue;
3538    
# Line 2505  while (!done) Line 3542  while (!done)
3542              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3543              && extra->executable_jit != NULL)              && extra->executable_jit != NULL)
3544            {            {
3545            if (jit_stack != NULL) pcre_jit_stack_free(jit_stack);            if (jit_stack != NULL) PCRE_JIT_STACK_FREE(jit_stack);
3546            jit_stack = pcre_jit_stack_alloc(1, n * 1024);            jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3547            pcre_assign_jit_stack(extra, jit_callback, jit_stack);            PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3548            }            }
3549          continue;          continue;
3550    
# Line 2603  while (!done) Line 3640  while (!done)
3640            }            }
3641          continue;          continue;
3642          }          }
3643        *q++ = c;  
3644          /* We now have a character value in c that may be greater than 255. In
3645          16-bit mode, we always convert characters to UTF-8 so that values greater
3646          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3647          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3648          mode must have come from \x{...} or octal constructs because values from
3649          \x.. get this far only in non-UTF mode. */
3650    
3651          if (use_pcre16 || use_utf)
3652            {
3653            pcre_uint8 buff8[8];
3654            int ii, utn;
3655            utn = ord2utf8(c, buff8);
3656            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3657            }
3658          else
3659            {
3660            if (c > 255)
3661              {
3662              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3663                "and UTF-8 mode is not enabled.\n", c);
3664              fprintf(outfile, "** Truncation will probably give the wrong "
3665                "result.\n");
3666              }
3667            *q++ = c;
3668            }
3669        }        }
3670    
3671        /* Reached end of subject string */
3672    
3673      *q = 0;      *q = 0;
3674      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
3675    
# Line 2666  while (!done) Line 3731  while (!done)
3731            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3732              {              {
3733              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3734              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3735                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3736              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3737              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3738                {                {
3739                fprintf(outfile, "%2d+ ", (int)i);                fprintf(outfile, "%2d+ ", (int)i);
3740                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3741                  outfile);                  outfile);
3742                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3743                }                }
# Line 2680  while (!done) Line 3745  while (!done)
3745            }            }
3746          }          }
3747        free(pmatch);        free(pmatch);
3748          goto NEXT_DATA;
3749        }        }
3750    
3751    #endif  /* !defined NOPOSIX */
3752    
3753      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3754    
3755      else  #ifdef SUPPORT_PCRE16
3756  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3757          {
3758          len = to16(TRUE, bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3759          switch(len)
3760            {
3761            case -1:
3762            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3763              "converted to UTF-16\n");
3764            goto NEXT_DATA;
3765    
3766            case -2:
3767            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3768              "cannot be converted to UTF-16\n");
3769            goto NEXT_DATA;
3770    
3771            case -3:
3772            fprintf(outfile, "**Failed: character value greater than 0xffff "
3773              "cannot be converted to 16-bit in non-UTF mode\n");
3774            goto NEXT_DATA;
3775    
3776            default:
3777            break;
3778            }
3779          bptr = (pcre_uint8 *)buffer16;
3780          }
3781    #endif
3782    
3783      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3784        {        {
# Line 2702  while (!done) Line 3795  while (!done)
3795            {            {
3796            int workspace[1000];            int workspace[1000];
3797            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3798              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3799                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3800                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3801                  (sizeof(workspace)/sizeof(int)));
3802                }
3803            }            }
3804          else          else
3805  #endif  #endif
3806    
3807          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3808            count = pcre_exec(re, extra, (char *)bptr, len,            {
3809              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3810                (options | g_notempty), use_offsets, use_size_offsets);
3811              }
3812          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3813          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3814            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2757  while (!done) Line 3853  while (!done)
3853            }            }
3854          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3855          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3856          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3857            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3858          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3859          }          }
# Line 2769  while (!done) Line 3865  while (!done)
3865        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3866          {          {
3867          int workspace[1000];          int workspace[1000];
3868          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3869            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3870            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3871          if (count == 0)          if (count == 0)
3872            {            {
3873            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2782  while (!done) Line 3878  while (!done)
3878    
3879        else        else
3880          {          {
3881          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3882            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3883          if (count == 0)          if (count == 0)
3884            {            {
3885            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 2796  while (!done) Line 3892  while (!done)
3892        if (count >= 0)        if (count >= 0)
3893          {          {
3894          int i, maxcount;          int i, maxcount;
3895            void *cnptr, *gnptr;
3896    
3897  #if !defined NODFA  #if !defined NODFA
3898          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2822  while (!done) Line 3919  while (!done)
3919    
3920          if (do_allcaps)          if (do_allcaps)
3921            {            {
3922            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
3923                goto SKIP_DATA;
3924            count++;   /* Allow for full match */            count++;   /* Allow for full match */
3925            if (count * 2 > use_size_offsets) count = use_size_offsets/2;            if (count * 2 > use_size_offsets) count = use_size_offsets/2;
3926            }            }
# Line 2844  while (!done) Line 3942  while (!done)
3942            else            else
3943              {              {
3944              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3945              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
3946                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3947              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3948              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3949                {                {
3950                fprintf(outfile, "%2d+ ", i/2);                fprintf(outfile, "%2d+ ", i/2);
3951                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
3952                  outfile);                  outfile);
3953                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3954                }                }
3955              }              }
3956            }            }
3957    
3958          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
3959              {
3960              fprintf(outfile, "MK: ");
3961              PCHARSV(markptr, 0, -1, outfile);
3962              fprintf(outfile, "\n");
3963              }
3964    
3965          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
3966            {            {
3967            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
3968              {              {
3969                int rc;
3970              char copybuffer[256];              char copybuffer[256];
3971              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
3972                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
3973              if (rc < 0)              if (rc < 0)
3974                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
3975              else              else
3976                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
3977                  fprintf(outfile, "%2dC ", i);
3978                  PCHARSV(copybuffer, 0, rc, outfile);
3979                  fprintf(outfile, " (%d)\n", rc);
3980                  }
3981              }              }
3982            }            }
3983    
3984          for (copynamesptr = copynames;          cnptr = copynames;
3985               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
3986            {            {
3987              int rc;
3988            char copybuffer[256];            char copybuffer[256];
3989            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
3990              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
3991                {
3992                if (*(pcre_uint16 *)cnptr == 0) break;
3993                }
3994              else
3995                {
3996                if (*(pcre_uint8 *)cnptr == 0) break;
3997                }
3998    
3999              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4000                cnptr, copybuffer, sizeof(copybuffer));
4001    
4002            if (rc < 0)            if (rc < 0)
4003              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4004                fprintf(outfile, "copy substring ");
4005                PCHARSV(cnptr, 0, -1, outfile);
4006                fprintf(outfile, " failed %d\n", rc);
4007                }
4008            else            else
4009              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4010                fprintf(outfile, "  C ");
4011                PCHARSV(copybuffer, 0, rc, outfile);
4012                fprintf(outfile, " (%d) ", rc);
4013                PCHARSV(cnptr, 0, -1, outfile);
4014                putc('\n', outfile);
4015                }
4016    
4017              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4018            }            }
4019    
4020          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4021            {            {
4022            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4023              {              {
4024                int rc;
4025              const char *substring;              const char *substring;
4026              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4027              if (rc < 0)              if (rc < 0)
4028                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4029              else              else
4030                {                {
4031                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4032                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4033                  fprintf(outfile, " (%d)\n", rc);
4034                  PCRE_FREE_SUBSTRING(substring);
4035                }                }
4036              }              }
4037            }            }
4038    
4039          for (getnamesptr = getnames;          gnptr = getnames;
4040               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4041            {            {
4042              int rc;
4043            const char *substring;            const char *substring;
4044            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4045              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4046                {
4047                if (*(pcre_uint16 *)gnptr == 0) break;
4048                }
4049              else
4050                {
4051                if (*(pcre_uint8 *)gnptr == 0) break;
4052                }
4053    
4054              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4055                gnptr, &substring);
4056            if (rc < 0)            if (rc < 0)
4057              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4058                fprintf(outfile, "get substring ");
4059                PCHARSV(gnptr, 0, -1, outfile);
4060                fprintf(outfile, " failed %d\n", rc);
4061                }
4062            else            else
4063              {              {
4064              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4065              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4066                fprintf(outfile, " (%d) ", rc);
4067                PCHARSV(gnptr, 0, -1, outfile);
4068                PCRE_FREE_SUBSTRING(substring);
4069                putc('\n', outfile);
4070              }              }
4071    
4072              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4073            }            }
4074    
4075          if (getlist)          if (getlist)
4076            {            {
4077              int rc;
4078            const char **stringlist;            const char **stringlist;
4079            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4080            if (rc < 0)            if (rc < 0)
4081              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4082            else            else
4083              {              {
4084              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4085                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4086                  fprintf(outfile, "%2dL ", i);
4087                  PCHARSV(stringlist[i], 0, -1, outfile);
4088                  putc('\n', outfile);
4089                  }
4090              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4091                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4092              pcre_free_substring_list(stringlist);              PCRE_FREE_SUBSTRING_LIST(stringlist);
4093              }              }
4094            }            }
4095          }          }
# Line 2942  while (!done) Line 4099  while (!done)
4099        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4100          {          {
4101          if (markptr == NULL) fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4102            else fprintf(outfile, "Partial match, mark=%s", markptr);          else
4103              {
4104              fprintf(outfile, "Partial match, mark=");
4105              PCHARSV(markptr, 0, -1, outfile);
4106              }
4107          if (use_size_offsets > 1)          if (use_size_offsets > 1)
4108            {            {
4109            fprintf(outfile, ": ");            fprintf(outfile, ": ");
4110            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4111              outfile);              outfile);
4112            }            }
4113          fprintf(outfile, "\n");          fprintf(outfile, "\n");
# Line 2963  while (!done) Line 4124  while (!done)
4124        terminated by CRLF, an advance of one character just passes the \r,        terminated by CRLF, an advance of one character just passes the \r,
4125        whereas we should prefer the longer newline sequence, as does the code in        whereas we should prefer the longer newline sequence, as does the code in
4126        pcre_exec(). Fudge the offset value to achieve this. We check for a        pcre_exec(). Fudge the offset value to achieve this. We check for a
4127        newline setting in the pattern; if none was set, use pcre_config() to        newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4128        find the default.        find the default.
4129    
4130        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
# Line 2979  while (!done) Line 4140  while (!done)
4140            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
4141              {              {
4142              int d;              int d;
4143              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4144              /* Note that these values are always the ASCII ones, even in              /* Note that these values are always the ASCII ones, even in
4145              EBCDIC environments. CR = 13, NL = 10. */              EBCDIC environments. CR = 13, NL = 10. */
4146              obits = (d == 13)? PCRE_NEWLINE_CR :              obits = (d == 13)? PCRE_NEWLINE_CR :
# Line 2993  while (!done) Line 4154  while (!done)
4154                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4155                &&                &&
4156                start_offset < len - 1 &&                start_offset < len - 1 &&
4157                bptr[start_offset] == '\r' &&  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4158                bptr[start_offset+1] == '\n')                (use_pcre16?
4159                       ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4160                    && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4161                  :
4162                       bptr[start_offset] == '\r'
4163                    && bptr[start_offset + 1] == '\n')
4164    #elif defined SUPPORT_PCRE16
4165                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4166                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4167    #else
4168                     bptr[start_offset] == '\r'
4169                  && bptr[start_offset + 1] == '\n'
4170    #endif
4171                  )
4172              onechar++;              onechar++;
4173            else if (use_utf8)            else if (use_utf)
4174              {              {
4175              while (start_offset + onechar < len)              while (start_offset + onechar < len)
4176                {                {
# Line 3013  while (!done) Line 4187  while (!done)
4187              case PCRE_ERROR_NOMATCH:              case PCRE_ERROR_NOMATCH:
4188              if (gmatched == 0)              if (gmatched == 0)
4189                {                {
4190                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL)
4191                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  {
4192                    fprintf(outfile, "No match\n");
4193                    }
4194                  else
4195                    {
4196                    fprintf(outfile, "No match, mark = ");
4197                    PCHARSV(markptr, 0, -1, outfile);
4198                    putc('\n', outfile);
4199                    }
4200                }                }
4201              break;              break;
4202    
4203              case PCRE_ERROR_BADUTF8:              case PCRE_ERROR_BADUTF8:
4204              case PCRE_ERROR_SHORTUTF8:              case PCRE_ERROR_SHORTUTF8:
4205              fprintf(outfile, "Error %d (%s UTF-8 string)", count,              fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4206                (count == PCRE_ERROR_BADUTF8)? "bad" : "short");                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4207                  use_pcre16? "16" : "8");
4208              if (use_size_offsets >= 2)              if (use_size_offsets >= 2)
4209                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4210                  use_offsets[1]);                  use_offsets[1]);
4211              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4212              break;              break;
4213    
4214                case PCRE_ERROR_BADUTF8_OFFSET:
4215                fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4216                  use_pcre16? "16" : "8");
4217                break;
4218    
4219              default:              default:
4220              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))
4221                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
# Line 3067  while (!done) Line 4255  while (!done)
4255    
4256        else        else
4257          {          {
4258          bptr += use_offsets[1];          bptr += use_offsets[1] * CHAR_SIZE;
4259          len -= use_offsets[1];          len -= use_offsets[1];
4260          }          }
4261        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
# Line 3082  while (!done) Line 4270  while (!done)
4270  #endif  #endif
4271    
4272    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
4273    if (extra != NULL) pcre_free_study(extra);    if (extra != NULL)
4274        {
4275        PCRE_FREE_STUDY(extra);
4276        }
4277    if (locale_set)    if (locale_set)
4278      {      {
4279      new_free((void *)tables);      new_free((void *)tables);
# Line 3091  while (!done) Line 4282  while (!done)
4282      }      }
4283    if (jit_stack != NULL)    if (jit_stack != NULL)
4284      {      {
4285      pcre_jit_stack_free(jit_stack);      PCRE_JIT_STACK_FREE(jit_stack);
4286      jit_stack = NULL;      jit_stack = NULL;
4287      }      }
4288    }    }
# Line 3108  free(dbuffer); Line 4299  free(dbuffer);
4299  free(pbuffer);  free(pbuffer);
4300  free(offsets);  free(offsets);
4301    
4302    #ifdef SUPPORT_PCRE16
4303    if (buffer16 != NULL) free(buffer16);
4304    #endif
4305    
4306  return yield;  return yield;
4307  }  }
4308    

Legend:
Removed from v.735  
changed lines
  Added in v.838

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12