/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 645 by ph10, Sun Jul 31 17:02:18 2011 UTC revision 923 by ph10, Tue Feb 21 13:25:05 2012 UTC
# Line 1  Line 1 
1  /*************************************************  /*.************************************************
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
51  #include "config.h"  #include "config.h"
# Line 94  input mode under Windows. */ Line 105  input mode under Windows. */
105  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
106  #endif  #endif
107    
108    #define PRIV(name) name
109    
110  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
111  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
# Line 105  here before pcre_internal.h so that the Line 117  here before pcre_internal.h so that the
117  appropriately for an application, not for building PCRE. */  appropriately for an application, not for building PCRE. */
118    
119  #include "pcre.h"  #include "pcre.h"
120    
121    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122    /* Configure internal macros to 16 bit mode. */
123    #define COMPILE_PCRE16
124    #endif
125    
126  #include "pcre_internal.h"  #include "pcre_internal.h"
127    
128    /* The pcre_printint() function, which prints the internal form of a compiled
129    regex, is held in a separate file so that (a) it can be compiled in either
130    8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131    when that is compiled in debug mode. */
132    
133    #ifdef SUPPORT_PCRE8
134    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135    #endif
136    #ifdef SUPPORT_PCRE16
137    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138    #endif
139    
140  /* We need access to some of the data tables that PCRE uses. So as not to have  /* We need access to some of the data tables that PCRE uses. So as not to have
141  to keep two copies, we include the source file here, changing the names of the  to keep two copies, we include the source file here, changing the names of the
142  external symbols to prevent clashes. */  external symbols to prevent clashes. */
143    
144  #define _pcre_ucp_gentype      ucp_gentype  #define PCRE_INCLUDED
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_utt_names        utt_names  
 #define _pcre_OP_lengths       OP_lengths  
145    
146  #include "pcre_tables.c"  #include "pcre_tables.c"
147    
 /* We also need the pcre_printint() function for printing out compiled  
 patterns. This function is in a separate file so that it can be included in  
 pcre_compile.c when that module is compiled with debugging enabled. It needs to  
 know which case is being compiled. */  
   
 #define COMPILING_PCRETEST  
 #include "pcre_printint.src"  
   
148  /* The definition of the macro PRINTABLE, which determines whether to print an  /* The definition of the macro PRINTABLE, which determines whether to print an
149  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
150  contained in the printint.src file. We uses it here also, in cases when the  the same as in the printint.src file. We uses it here in cases when the locale
151  locale has not been explicitly changed, so as to get consistent output from  has not been explicitly changed, so as to get consistent output from systems
152  systems that differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
153    
154    #ifdef EBCDIC
155    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156    #else
157    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158    #endif
159    
160  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161    
162    /* Posix support is disabled in 16 bit only mode. */
163    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164    #define NOPOSIX
165    #endif
166    
167  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
168  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 148  Makefile. */ Line 172  Makefile. */
172  #include "pcreposix.h"  #include "pcreposix.h"
173  #endif  #endif
174    
175  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
176  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177  interface to the DFA matcher (NODFA), and without the doublecheck of the old  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  automatically cut out the UTF support if PCRE is built without it. */
179  UTF8 support if PCRE is built without it. */  
180    #ifndef SUPPORT_UTF
181  #ifndef SUPPORT_UTF8  #ifndef NOUTF
182  #ifndef NOUTF8  #define NOUTF
 #define NOUTF8  
183  #endif  #endif
184  #endif  #endif
185    
186    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188    only from one place and is handled differently). I couldn't dream up any way of
189    using a single macro to do this in a generic way, because of the many different
190    argument requirements. We know that at least one of SUPPORT_PCRE8 and
191    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192    use these in the definitions of generic macros.
193    
194    **** Special note about the PCHARSxxx macros: the address of the string to be
195    printed is always given as two arguments: a base address followed by an offset.
196    The base address is cast to the correct data size for 8 or 16 bit data; the
197    offset is in units of this size. If the string were given as base+offset in one
198    argument, the casting might be incorrectly applied. */
199    
200    #ifdef SUPPORT_PCRE8
201    
202    #define PCHARS8(lv, p, offset, len, f) \
203      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204    
205    #define PCHARSV8(p, offset, len, f) \
206      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207    
208    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209      p = read_capture_name8(p, cn8, re)
210    
211    #define STRLEN8(p) ((int)strlen((char *)p))
212    
213    #define SET_PCRE_CALLOUT8(callout) \
214      pcre_callout = callout
215    
216    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217       pcre_assign_jit_stack(extra, callback, userdata)
218    
219    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220      re = pcre_compile((char *)pat, options, error, erroffset, tables)
221    
222    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223        namesptr, cbuffer, size) \
224      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225        (char *)namesptr, cbuffer, size)
226    
227    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229    
230    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231        offsets, size_offsets, workspace, size_workspace) \
232      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233        offsets, size_offsets, workspace, size_workspace)
234    
235    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236        offsets, size_offsets) \
237      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238        offsets, size_offsets)
239    
240    #define PCRE_FREE_STUDY8(extra) \
241      pcre_free_study(extra)
242    
243    #define PCRE_FREE_SUBSTRING8(substring) \
244      pcre_free_substring(substring)
245    
246    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247      pcre_free_substring_list(listptr)
248    
249    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250        getnamesptr, subsptr) \
251      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252        (char *)getnamesptr, subsptr)
253    
254    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255      n = pcre_get_stringnumber(re, (char *)ptr)
256    
257    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259    
260    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262    
263    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265    
266    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267      pcre_printint(re, outfile, debug_lengths)
268    
269    #define PCRE_STUDY8(extra, re, options, error) \
270      extra = pcre_study(re, options, error)
271    
272    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273      pcre_jit_stack_alloc(startsize, maxsize)
274    
275    #define PCRE_JIT_STACK_FREE8(stack) \
276      pcre_jit_stack_free(stack)
277    
278    #endif /* SUPPORT_PCRE8 */
279    
280    /* -----------------------------------------------------------*/
281    
282    #ifdef SUPPORT_PCRE16
283    
284    #define PCHARS16(lv, p, offset, len, f) \
285      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286    
287    #define PCHARSV16(p, offset, len, f) \
288      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289    
290    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291      p = read_capture_name16(p, cn16, re)
292    
293    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294    
295    #define SET_PCRE_CALLOUT16(callout) \
296      pcre16_callout = (int (*)(pcre16_callout_block *))callout
297    
298    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299      pcre16_assign_jit_stack((pcre16_extra *)extra, \
300        (pcre16_jit_callback)callback, userdata)
301    
302    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304        tables)
305    
306    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307        namesptr, cbuffer, size) \
308      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310    
311    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313        (PCRE_UCHAR16 *)cbuffer, size/2)
314    
315    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316        offsets, size_offsets, workspace, size_workspace) \
317      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319        workspace, size_workspace)
320    
321    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322        offsets, size_offsets) \
323      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324        len, start_offset, options, offsets, size_offsets)
325    
326    #define PCRE_FREE_STUDY16(extra) \
327      pcre16_free_study((pcre16_extra *)extra)
328    
329    #define PCRE_FREE_SUBSTRING16(substring) \
330      pcre16_free_substring((PCRE_SPTR16)substring)
331    
332    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334    
335    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336        getnamesptr, subsptr) \
337      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339    
340    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342    
343    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345        (PCRE_SPTR16 *)(void*)subsptr)
346    
347    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349        (PCRE_SPTR16 **)(void*)listptr)
350    
351    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353        tables)
354    
355    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356      pcre16_printint(re, outfile, debug_lengths)
357    
358    #define PCRE_STUDY16(extra, re, options, error) \
359      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360    
361    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363    
364    #define PCRE_JIT_STACK_FREE16(stack) \
365      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366    
367    #endif /* SUPPORT_PCRE16 */
368    
369    
370    /* ----- Both modes are supported; a runtime test is needed, except for
371    pcre_config(), and the JIT stack functions, when it doesn't matter which
372    version is called. ----- */
373    
374    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375    
376    #define CHAR_SIZE (use_pcre16? 2:1)
377    
378    #define PCHARS(lv, p, offset, len, f) \
379      if (use_pcre16) \
380        PCHARS16(lv, p, offset, len, f); \
381      else \
382        PCHARS8(lv, p, offset, len, f)
383    
384    #define PCHARSV(p, offset, len, f) \
385      if (use_pcre16) \
386        PCHARSV16(p, offset, len, f); \
387      else \
388        PCHARSV8(p, offset, len, f)
389    
390    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391      if (use_pcre16) \
392        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393      else \
394        READ_CAPTURE_NAME8(p, cn8, cn16, re)
395    
396    #define SET_PCRE_CALLOUT(callout) \
397      if (use_pcre16) \
398        SET_PCRE_CALLOUT16(callout); \
399      else \
400        SET_PCRE_CALLOUT8(callout)
401    
402    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403    
404    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405      if (use_pcre16) \
406        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407      else \
408        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409    
410    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411      if (use_pcre16) \
412        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413      else \
414        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415    
416    #define PCRE_CONFIG pcre_config
417    
418    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419        namesptr, cbuffer, size) \
420      if (use_pcre16) \
421        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422          namesptr, cbuffer, size); \
423      else \
424        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425          namesptr, cbuffer, size)
426    
427    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428      if (use_pcre16) \
429        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430      else \
431        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432    
433    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434        offsets, size_offsets, workspace, size_workspace) \
435      if (use_pcre16) \
436        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437          offsets, size_offsets, workspace, size_workspace); \
438      else \
439        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440          offsets, size_offsets, workspace, size_workspace)
441    
442    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443        offsets, size_offsets) \
444      if (use_pcre16) \
445        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446          offsets, size_offsets); \
447      else \
448        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449          offsets, size_offsets)
450    
451    #define PCRE_FREE_STUDY(extra) \
452      if (use_pcre16) \
453        PCRE_FREE_STUDY16(extra); \
454      else \
455        PCRE_FREE_STUDY8(extra)
456    
457    #define PCRE_FREE_SUBSTRING(substring) \
458      if (use_pcre16) \
459        PCRE_FREE_SUBSTRING16(substring); \
460      else \
461        PCRE_FREE_SUBSTRING8(substring)
462    
463    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464      if (use_pcre16) \
465        PCRE_FREE_SUBSTRING_LIST16(listptr); \
466      else \
467        PCRE_FREE_SUBSTRING_LIST8(listptr)
468    
469    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470        getnamesptr, subsptr) \
471      if (use_pcre16) \
472        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473          getnamesptr, subsptr); \
474      else \
475        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476          getnamesptr, subsptr)
477    
478    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479      if (use_pcre16) \
480        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481      else \
482        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483    
484    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485      if (use_pcre16) \
486        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487      else \
488        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489    
490    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491      if (use_pcre16) \
492        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493      else \
494        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495    
496    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497      (use_pcre16 ? \
498         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500    
501    #define PCRE_JIT_STACK_FREE(stack) \
502      if (use_pcre16) \
503        PCRE_JIT_STACK_FREE16(stack); \
504      else \
505        PCRE_JIT_STACK_FREE8(stack)
506    
507    #define PCRE_MAKETABLES \
508      (use_pcre16? pcre16_maketables() : pcre_maketables())
509    
510    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511      if (use_pcre16) \
512        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513      else \
514        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515    
516    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517      if (use_pcre16) \
518        PCRE_PRINTINT16(re, outfile, debug_lengths); \
519      else \
520        PCRE_PRINTINT8(re, outfile, debug_lengths)
521    
522    #define PCRE_STUDY(extra, re, options, error) \
523      if (use_pcre16) \
524        PCRE_STUDY16(extra, re, options, error); \
525      else \
526        PCRE_STUDY8(extra, re, options, error)
527    
528    /* ----- Only 8-bit mode is supported ----- */
529    
530    #elif defined SUPPORT_PCRE8
531    #define CHAR_SIZE                 1
532    #define PCHARS                    PCHARS8
533    #define PCHARSV                   PCHARSV8
534    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
535    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
536    #define STRLEN                    STRLEN8
537    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
538    #define PCRE_COMPILE              PCRE_COMPILE8
539    #define PCRE_CONFIG               pcre_config
540    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
542    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
543    #define PCRE_EXEC                 PCRE_EXEC8
544    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
545    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
546    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
547    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
548    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
549    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
550    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
551    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
552    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
553    #define PCRE_MAKETABLES           pcre_maketables()
554    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555    #define PCRE_PRINTINT             PCRE_PRINTINT8
556    #define PCRE_STUDY                PCRE_STUDY8
557    
558    /* ----- Only 16-bit mode is supported ----- */
559    
560    #else
561    #define CHAR_SIZE                 2
562    #define PCHARS                    PCHARS16
563    #define PCHARSV                   PCHARSV16
564    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
565    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
566    #define STRLEN                    STRLEN16
567    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
568    #define PCRE_COMPILE              PCRE_COMPILE16
569    #define PCRE_CONFIG               pcre16_config
570    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
572    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
573    #define PCRE_EXEC                 PCRE_EXEC16
574    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
575    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
576    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
577    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
578    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
579    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
580    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
581    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
582    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
583    #define PCRE_MAKETABLES           pcre16_maketables()
584    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585    #define PCRE_PRINTINT             PCRE_PRINTINT16
586    #define PCRE_STUDY                PCRE_STUDY16
587    #endif
588    
589    /* ----- End of mode-specific function call macros ----- */
590    
591    
592  /* Other parameters */  /* Other parameters */
593    
# Line 187  static int debug_lengths; Line 615  static int debug_lengths;
615  static int first_callout;  static int first_callout;
616  static int locale_set = 0;  static int locale_set = 0;
617  static int show_malloc;  static int show_malloc;
618  static int use_utf8;  static int use_utf;
619  static size_t gotten_store;  static size_t gotten_store;
620    static size_t first_gotten_store = 0;
621  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
622    
623  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
624    
625  static int buffer_size = 50000;  static int buffer_size = 50000;
626  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
627  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
628  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
629    
630    /* Another buffer is needed translation to 16-bit character strings. It will
631    obtained and extended as required. */
632    
633    #ifdef SUPPORT_PCRE16
634    static int buffer16_size = 0;
635    static pcre_uint16 *buffer16 = NULL;
636    
637    #ifdef SUPPORT_PCRE8
638    
639    /* We need the table of operator lengths that is used for 16-bit compiling, in
640    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
641    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
642    appropriately for the 16-bit world. Just as a safety check, make sure that
643    COMPILE_PCRE16 is *not* set. */
644    
645    #ifdef COMPILE_PCRE16
646    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
647    #endif
648    
649    #if LINK_SIZE == 2
650    #undef LINK_SIZE
651    #define LINK_SIZE 1
652    #elif LINK_SIZE == 3 || LINK_SIZE == 4
653    #undef LINK_SIZE
654    #define LINK_SIZE 2
655    #else
656    #error LINK_SIZE must be either 2, 3, or 4
657    #endif
658    
659    #undef IMM2_SIZE
660    #define IMM2_SIZE 1
661    
662    #endif /* SUPPORT_PCRE8 */
663    
664    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
665    #endif  /* SUPPORT_PCRE16 */
666    
667    /* If we have 8-bit support, default use_pcre16 to false; if there is also
668    16-bit support, it can be changed by an option. If there is no 8-bit support,
669    there must be 16-bit support, so default it to 1. */
670    
671    #ifdef SUPPORT_PCRE8
672    static int use_pcre16 = 0;
673    #else
674    static int use_pcre16 = 1;
675    #endif
676    
677    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
678    
679    static int jit_study_bits[] =
680      {
681      PCRE_STUDY_JIT_COMPILE,
682      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
683      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
684      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
685      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
686      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
687      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
688        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
689    };
690    
691  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
692    
# Line 208  static const char *errtexts[] = { Line 698  static const char *errtexts[] = {
698    "magic number missing",    "magic number missing",
699    "unknown opcode - pattern overwritten?",    "unknown opcode - pattern overwritten?",
700    "no more memory",    "no more memory",
701    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */    NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
702    "match limit exceeded",    "match limit exceeded",
703    "callout error code",    "callout error code",
704    NULL,  /* BADUTF8 is handled specially */    NULL,  /* BADUTF8/16 is handled specially */
705    "bad UTF-8 offset",    NULL,  /* BADUTF8/16 offset is handled specially */
706    NULL,  /* PARTIAL is handled specially */    NULL,  /* PARTIAL is handled specially */
707    "not used - internal error",    "not used - internal error",
708    "internal error - pattern overwritten?",    "internal error - pattern overwritten?",
# Line 221  static const char *errtexts[] = { Line 711  static const char *errtexts[] = {
711    "backreference condition or recursion test not supported for DFA matching",    "backreference condition or recursion test not supported for DFA matching",
712    "match limit not supported for DFA matching",    "match limit not supported for DFA matching",
713    "workspace size exceeded in DFA matching",    "workspace size exceeded in DFA matching",
714    "too much recursion for DFA matching",    "too much recursion for DFA matching",
715    "recursion limit exceeded",    "recursion limit exceeded",
716    "not used - internal error",    "not used - internal error",
717    "invalid combination of newline options",    "invalid combination of newline options",
718    "bad offset value",    "bad offset value",
719    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8/16 is handled specially */
720    "nested recursion at the same subject position"    "nested recursion at the same subject position",
721      "JIT stack limit reached",
722      "pattern compiled in wrong mode: 8-bit/16-bit error"
723  };  };
724    
725    
726  /*************************************************  /*************************************************
727  *         Alternate character tables             *  *         Alternate character tables             *
# Line 243  the L (locale) option also adjusts the t Line 735  the L (locale) option also adjusts the t
735  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
736  only ASCII characters. */  only ASCII characters. */
737    
738  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
739    
740  /* This table is a lower casing table. */  /* This table is a lower casing table. */
741    
# Line 416  graph, print, punct, and cntrl. Other cl Line 908  graph, print, punct, and cntrl. Other cl
908  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
909  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
910    
911  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
912  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
913  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
914  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 579  return sys_errlist[n]; Line 1071  return sys_errlist[n];
1071  #endif /* HAVE_STRERROR */  #endif /* HAVE_STRERROR */
1072    
1073    
1074    /*************************************************
1075    *         JIT memory callback                    *
1076    *************************************************/
1077    
1078    static pcre_jit_stack* jit_callback(void *arg)
1079    {
1080    return (pcre_jit_stack *)arg;
1081    }
1082    
1083    
1084    #if !defined NOUTF || defined SUPPORT_PCRE16
1085    /*************************************************
1086    *            Convert UTF-8 string to value       *
1087    *************************************************/
1088    
1089    /* This function takes one or more bytes that represents a UTF-8 character,
1090    and returns the value of the character.
1091    
1092    Argument:
1093      utf8bytes   a pointer to the byte vector
1094      vptr        a pointer to an int to receive the value
1095    
1096    Returns:      >  0 => the number of bytes consumed
1097                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1098    */
1099    
1100    static int
1101    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1102    {
1103    int c = *utf8bytes++;
1104    int d = c;
1105    int i, j, s;
1106    
1107    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1108      {
1109      if ((d & 0x80) == 0) break;
1110      d <<= 1;
1111      }
1112    
1113    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1114    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1115    
1116    /* i now has a value in the range 1-5 */
1117    
1118    s = 6*i;
1119    d = (c & utf8_table3[i]) << s;
1120    
1121    for (j = 0; j < i; j++)
1122      {
1123      c = *utf8bytes++;
1124      if ((c & 0xc0) != 0x80) return -(j+1);
1125      s -= 6;
1126      d |= (c & 0x3f) << s;
1127      }
1128    
1129    /* Check that encoding was the correct unique one */
1130    
1131    for (j = 0; j < utf8_table1_size; j++)
1132      if (d <= utf8_table1[j]) break;
1133    if (j != i) return -(i+1);
1134    
1135    /* Valid value */
1136    
1137    *vptr = d;
1138    return i+1;
1139    }
1140    #endif /* NOUTF || SUPPORT_PCRE16 */
1141    
1142    
1143    
1144    #if !defined NOUTF || defined SUPPORT_PCRE16
1145    /*************************************************
1146    *       Convert character value to UTF-8         *
1147    *************************************************/
1148    
1149    /* This function takes an integer value in the range 0 - 0x7fffffff
1150    and encodes it as a UTF-8 character in 0 to 6 bytes.
1151    
1152    Arguments:
1153      cvalue     the character value
1154      utf8bytes  pointer to buffer for result - at least 6 bytes long
1155    
1156    Returns:     number of characters placed in the buffer
1157    */
1158    
1159    static int
1160    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1161    {
1162    register int i, j;
1163    for (i = 0; i < utf8_table1_size; i++)
1164      if (cvalue <= utf8_table1[i]) break;
1165    utf8bytes += i;
1166    for (j = i; j > 0; j--)
1167     {
1168     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1169     cvalue >>= 6;
1170     }
1171    *utf8bytes = utf8_table2[i] | cvalue;
1172    return i + 1;
1173    }
1174    #endif
1175    
1176    
1177    #ifdef SUPPORT_PCRE16
1178    /*************************************************
1179    *         Convert a string to 16-bit             *
1180    *************************************************/
1181    
1182    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1183    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1184    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1185    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1186    result is always left in buffer16.
1187    
1188    Note that this function does not object to surrogate values. This is
1189    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1190    for the purpose of testing that they are correctly faulted.
1191    
1192    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1193    in UTF-8 so that values greater than 255 can be handled.
1194    
1195    Arguments:
1196      data       TRUE if converting a data line; FALSE for a regex
1197      p          points to a byte string
1198      utf        true if UTF-8 (to be converted to UTF-16)
1199      len        number of bytes in the string (excluding trailing zero)
1200    
1201    Returns:     number of 16-bit data items used (excluding trailing zero)
1202                 OR -1 if a UTF-8 string is malformed
1203                 OR -2 if a value > 0x10ffff is encountered
1204                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1205    */
1206    
1207    static int
1208    to16(int data, pcre_uint8 *p, int utf, int len)
1209    {
1210    pcre_uint16 *pp;
1211    
1212    if (buffer16_size < 2*len + 2)
1213      {
1214      if (buffer16 != NULL) free(buffer16);
1215      buffer16_size = 2*len + 2;
1216      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1217      if (buffer16 == NULL)
1218        {
1219        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1220        exit(1);
1221        }
1222      }
1223    
1224    pp = buffer16;
1225    
1226    if (!utf && !data)
1227      {
1228      while (len-- > 0) *pp++ = *p++;
1229      }
1230    
1231    else
1232      {
1233      int c = 0;
1234      while (len > 0)
1235        {
1236        int chlen = utf82ord(p, &c);
1237        if (chlen <= 0) return -1;
1238        if (c > 0x10ffff) return -2;
1239        p += chlen;
1240        len -= chlen;
1241        if (c < 0x10000) *pp++ = c; else
1242          {
1243          if (!utf) return -3;
1244          c -= 0x10000;
1245          *pp++ = 0xD800 | (c >> 10);
1246          *pp++ = 0xDC00 | (c & 0x3ff);
1247          }
1248        }
1249      }
1250    
1251    *pp = 0;
1252    return pp - buffer16;
1253    }
1254    #endif
1255    
1256    
1257  /*************************************************  /*************************************************
# Line 604  Returns: pointer to the start of n Line 1277  Returns: pointer to the start of n
1277                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1278  */  */
1279    
1280  static uschar *  static pcre_uint8 *
1281  extend_inputline(FILE *f, uschar *start, const char *prompt)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1282  {  {
1283  uschar *here = start;  pcre_uint8 *here = start;
1284    
1285  for (;;)  for (;;)
1286    {    {
1287    int rlen = (int)(buffer_size - (here - buffer));    size_t rlen = (size_t)(buffer_size - (here - buffer));
1288    
1289    if (rlen > 1000)    if (rlen > 1000)
1290      {      {
# Line 654  for (;;) Line 1327  for (;;)
1327    else    else
1328      {      {
1329      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1330      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1331      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1332      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1333    
1334      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1335        {        {
# Line 687  return NULL; /* Control never gets here Line 1360  return NULL; /* Control never gets here
1360    
1361    
1362    
   
   
   
   
1363  /*************************************************  /*************************************************
1364  *          Read number from string               *  *          Read number from string               *
1365  *************************************************/  *************************************************/
# Line 707  Returns: the unsigned long Line 1376  Returns: the unsigned long
1376  */  */
1377    
1378  static int  static int
1379  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1380  {  {
1381  int result = 0;  int result = 0;
1382  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 718  return(result); Line 1387  return(result);
1387    
1388    
1389    
   
1390  /*************************************************  /*************************************************
1391  *            Convert UTF-8 string to value       *  *             Print one character                *
1392  *************************************************/  *************************************************/
1393    
1394  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
1395    
1396  Returns:      >  0 => the number of bytes consumed  static int pchar(int c, FILE *f)
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1397  {  {
1398  int c = *utf8bytes++;  if (PRINTOK(c))
1399  int d = c;    {
1400  int i, j, s;    if (f != NULL) fprintf(f, "%c", c);
1401      return 1;
1402      }
1403    
1404  for (i = -1; i < 6; i++)               /* i is number of additional bytes */  if (c < 0x100)
1405    {    {
1406    if ((d & 0x80) == 0) break;    if (use_utf)
1407    d <<= 1;      {
1408        if (f != NULL) fprintf(f, "\\x{%02x}", c);
1409        return 6;
1410        }
1411      else
1412        {
1413        if (f != NULL) fprintf(f, "\\x%02x", c);
1414        return 4;
1415        }
1416    }    }
1417    
1418  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1419  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  return (c <= 0x000000ff)? 6 :
1420           (c <= 0x00000fff)? 7 :
1421           (c <= 0x0000ffff)? 8 :
1422           (c <= 0x000fffff)? 9 : 10;
1423    }
1424    
 /* i now has a value in the range 1-5 */  
1425    
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
1426    
1427  for (j = 0; j < i; j++)  #ifdef SUPPORT_PCRE8
1428    {  /*************************************************
1429    c = *utf8bytes++;  *         Print 8-bit character string           *
1430    if ((c & 0xc0) != 0x80) return -(j+1);  *************************************************/
   s -= 6;  
   d |= (c & 0x3f) << s;  
   }  
1431    
1432  /* Check that encoding was the correct unique one */  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1433    If handed a NULL file, just counts chars without printing. */
1434    
1435  for (j = 0; j < utf8_table1_size; j++)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1436    if (d <= utf8_table1[j]) break;  {
1437  if (j != i) return -(i+1);  int c = 0;
1438    int yield = 0;
1439    
1440  /* Valid value */  if (length < 0)
1441      length = strlen((char *)p);
1442    
1443  *vptr = d;  while (length-- > 0)
1444  return i+1;    {
1445  }  #if !defined NOUTF
1446      if (use_utf)
1447        {
1448        int rc = utf82ord(p, &c);
1449        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1450          {
1451          length -= rc - 1;
1452          p += rc;
1453          yield += pchar(c, f);
1454          continue;
1455          }
1456        }
1457    #endif
1458      c = *p++;
1459      yield += pchar(c, f);
1460      }
1461    
1462    return yield;
1463    }
1464  #endif  #endif
1465    
1466    
1467    
1468    #ifdef SUPPORT_PCRE16
1469  /*************************************************  /*************************************************
1470  *       Convert character value to UTF-8         *  *    Find length of 0-terminated 16-bit string   *
1471  *************************************************/  *************************************************/
1472    
1473  /* This function takes an integer value in the range 0 - 0x7fffffff  static int strlen16(PCRE_SPTR16 p)
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 ord2utf8(int cvalue, uschar *utf8bytes)  
1474  {  {
1475  register int i, j;  int len = 0;
1476  for (i = 0; i < utf8_table1_size; i++)  while (*p++ != 0) len++;
1477    if (cvalue <= utf8_table1[i]) break;  return len;
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1478  }  }
1479    #endif  /* SUPPORT_PCRE16 */
 #endif  
   
1480    
1481    
1482    #ifdef SUPPORT_PCRE16
1483  /*************************************************  /*************************************************
1484  *             Print character string             *  *           Print 16-bit character string        *
1485  *************************************************/  *************************************************/
1486    
1487  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1488  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1489    
1490  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1491  {  {
 int c = 0;  
1492  int yield = 0;  int yield = 0;
1493    
1494    if (length < 0)
1495      length = strlen16(p);
1496    
1497  while (length-- > 0)  while (length-- > 0)
1498    {    {
1499  #if !defined NOUTF8    int c = *p++ & 0xffff;
1500    if (use_utf8)  #if !defined NOUTF
1501      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1502      {      {
1503      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1504        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1505        {        {
1506        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1507        p += rc;        length--;
1508        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1509        }        }
1510      }      }
1511  #endif  #endif
1512      yield += pchar(c, f);
1513      }
1514    
1515     /* Not UTF-8, or malformed UTF-8  */  return yield;
1516    }
1517    #endif  /* SUPPORT_PCRE16 */
1518    
1519    c = *p++;  
1520    if (PRINTHEX(c))  
1521      {  #ifdef SUPPORT_PCRE8
1522      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1523      yield++;  *     Read a capture name (8-bit) and check it   *
1524      }  *************************************************/
1525    else  
1526      {  static pcre_uint8 *
1527      if (f != NULL) fprintf(f, "\\x%02x", c);  read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1528      yield += 4;  {
1529      }  pcre_uint8 *npp = *pp;
1530    while (isalnum(*p)) *npp++ = *p++;
1531    *npp++ = 0;
1532    *npp = 0;
1533    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1534      {
1535      fprintf(outfile, "no parentheses with name \"");
1536      PCHARSV(*pp, 0, -1, outfile);
1537      fprintf(outfile, "\"\n");
1538    }    }
1539    
1540  return yield;  *pp = npp;
1541    return p;
1542    }
1543    #endif  /* SUPPORT_PCRE8 */
1544    
1545    
1546    
1547    #ifdef SUPPORT_PCRE16
1548    /*************************************************
1549    *     Read a capture name (16-bit) and check it  *
1550    *************************************************/
1551    
1552    /* Note that the text being read is 8-bit. */
1553    
1554    static pcre_uint8 *
1555    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1556    {
1557    pcre_uint16 *npp = *pp;
1558    while (isalnum(*p)) *npp++ = *p++;
1559    *npp++ = 0;
1560    *npp = 0;
1561    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1562      {
1563      fprintf(outfile, "no parentheses with name \"");
1564      PCHARSV(*pp, 0, -1, outfile);
1565      fprintf(outfile, "\"\n");
1566      }
1567    *pp = npp;
1568    return p;
1569  }  }
1570    #endif  /* SUPPORT_PCRE16 */
1571    
1572    
1573    
# Line 905  if (callout_extra) Line 1596  if (callout_extra)
1596      else      else
1597        {        {
1598        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1599        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1600          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1601        fprintf(f, "\n");        fprintf(f, "\n");
1602        }        }
# Line 918  printed lengths of the substrings. */ Line 1609  printed lengths of the substrings. */
1609    
1610  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1611    
1612  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1613  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1614    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1615    
1616  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1617    
1618  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1619    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1620    
1621  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 961  fprintf(outfile, "%.*s", (cb->next_item_ Line 1652  fprintf(outfile, "%.*s", (cb->next_item_
1652  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1653  first_callout = 0;  first_callout = 0;
1654    
1655  if (cb->mark != last_callout_mark)  if (cb->mark != last_callout_mark)
1656    {    {
1657    fprintf(outfile, "Latest Mark: %s\n",    if (cb->mark == NULL)
1658      (cb->mark == NULL)? "<unset>" : (char *)(cb->mark));      fprintf(outfile, "Latest Mark: <unset>\n");
1659    last_callout_mark = cb->mark;    else
1660    }      {
1661        fprintf(outfile, "Latest Mark: ");
1662        PCHARSV(cb->mark, 0, -1, outfile);
1663        putc('\n', outfile);
1664        }
1665      last_callout_mark = cb->mark;
1666      }
1667    
1668  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1669    {    {
# Line 987  return (cb->callout_number != callout_fa Line 1684  return (cb->callout_number != callout_fa
1684  *            Local malloc functions              *  *            Local malloc functions              *
1685  *************************************************/  *************************************************/
1686    
1687  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1688  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1689    show_malloc variable is set only during matching. */
1690    
1691  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1692  {  {
1693  void *block = malloc(size);  void *block = malloc(size);
1694  gotten_store = size;  gotten_store = size;
1695    if (first_gotten_store == 0) first_gotten_store = size;
1696  if (show_malloc)  if (show_malloc)
1697    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1698  return block;  return block;
# Line 1006  if (show_malloc) Line 1705  if (show_malloc)
1705  free(block);  free(block);
1706  }  }
1707    
   
1708  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1709    
1710  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 1029  free(block); Line 1727  free(block);
1727  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1728  *************************************************/  *************************************************/
1729    
1730  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1731    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1732    value, but the code is defensive.
1733    
1734    Arguments:
1735      re        compiled regex
1736      study     study data
1737      option    PCRE_INFO_xxx option
1738      ptr       where to put the data
1739    
1740    Returns:    0 when OK, < 0 on error
1741    */
1742    
1743  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1744    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1745  {  {
1746  int rc;  int rc;
1747  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1748    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1749    #ifdef SUPPORT_PCRE16
1750      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1751    #else
1752      rc = PCRE_ERROR_BADMODE;
1753    #endif
1754    else
1755    #ifdef SUPPORT_PCRE8
1756      rc = pcre_fullinfo(re, study, option, ptr);
1757    #else
1758      rc = PCRE_ERROR_BADMODE;
1759    #endif
1760    
1761    if (rc < 0)
1762      {
1763      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1764        use_pcre16? "16" : "", option);
1765      if (rc == PCRE_ERROR_BADMODE)
1766        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1767          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1768      }
1769    
1770    return rc;
1771  }  }
1772    
1773    
1774    
1775  /*************************************************  /*************************************************
1776  *         Byte flipping function                 *  *             Swap byte functions                *
1777  *************************************************/  *************************************************/
1778    
1779  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1780  byteflip(unsigned long int value, int n)  value, respectively.
1781    
1782    Arguments:
1783      value        any number
1784    
1785    Returns:       the byte swapped value
1786    */
1787    
1788    static pcre_uint32
1789    swap_uint32(pcre_uint32 value)
1790  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1791  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1792         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1793         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1794         ((value & 0xff000000) >> 24);         (value >> 24);
1795  }  }
1796    
1797    static pcre_uint16
1798    swap_uint16(pcre_uint16 value)
1799    {
1800    return (value >> 8) | (value << 8);
1801    }
1802    
1803    
1804    
1805    /*************************************************
1806    *        Flip bytes in a compiled pattern        *
1807    *************************************************/
1808    
1809    /* This function is called if the 'F' option was present on a pattern that is
1810    to be written to a file. We flip the bytes of all the integer fields in the
1811    regex data block and the study block. In 16-bit mode this also flips relevant
1812    bytes in the pattern itself. This is to make it possible to test PCRE's
1813    ability to reload byte-flipped patterns, e.g. those compiled on a different
1814    architecture. */
1815    
1816    static void
1817    regexflip(pcre *ere, pcre_extra *extra)
1818    {
1819    REAL_PCRE *re = (REAL_PCRE *)ere;
1820    #ifdef SUPPORT_PCRE16
1821    int op;
1822    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1823    int length = re->name_count * re->name_entry_size;
1824    #ifdef SUPPORT_UTF
1825    BOOL utf = (re->options & PCRE_UTF16) != 0;
1826    BOOL utf16_char = FALSE;
1827    #endif /* SUPPORT_UTF */
1828    #endif /* SUPPORT_PCRE16 */
1829    
1830    /* Always flip the bytes in the main data block and study blocks. */
1831    
1832    re->magic_number = REVERSED_MAGIC_NUMBER;
1833    re->size = swap_uint32(re->size);
1834    re->options = swap_uint32(re->options);
1835    re->flags = swap_uint16(re->flags);
1836    re->top_bracket = swap_uint16(re->top_bracket);
1837    re->top_backref = swap_uint16(re->top_backref);
1838    re->first_char = swap_uint16(re->first_char);
1839    re->req_char = swap_uint16(re->req_char);
1840    re->name_table_offset = swap_uint16(re->name_table_offset);
1841    re->name_entry_size = swap_uint16(re->name_entry_size);
1842    re->name_count = swap_uint16(re->name_count);
1843    
1844    if (extra != NULL)
1845      {
1846      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1847      rsd->size = swap_uint32(rsd->size);
1848      rsd->flags = swap_uint32(rsd->flags);
1849      rsd->minlength = swap_uint32(rsd->minlength);
1850      }
1851    
1852    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1853    in the name table, if present, and then in the pattern itself. */
1854    
1855    #ifdef SUPPORT_PCRE16
1856    if (!use_pcre16) return;
1857    
1858    while(TRUE)
1859      {
1860      /* Swap previous characters. */
1861      while (length-- > 0)
1862        {
1863        *ptr = swap_uint16(*ptr);
1864        ptr++;
1865        }
1866    #ifdef SUPPORT_UTF
1867      if (utf16_char)
1868        {
1869        if ((ptr[-1] & 0xfc00) == 0xd800)
1870          {
1871          /* We know that there is only one extra character in UTF-16. */
1872          *ptr = swap_uint16(*ptr);
1873          ptr++;
1874          }
1875        }
1876      utf16_char = FALSE;
1877    #endif /* SUPPORT_UTF */
1878    
1879      /* Get next opcode. */
1880    
1881      length = 0;
1882      op = *ptr;
1883      *ptr++ = swap_uint16(op);
1884    
1885      switch (op)
1886        {
1887        case OP_END:
1888        return;
1889    
1890    #ifdef SUPPORT_UTF
1891        case OP_CHAR:
1892        case OP_CHARI:
1893        case OP_NOT:
1894        case OP_NOTI:
1895        case OP_STAR:
1896        case OP_MINSTAR:
1897        case OP_PLUS:
1898        case OP_MINPLUS:
1899        case OP_QUERY:
1900        case OP_MINQUERY:
1901        case OP_UPTO:
1902        case OP_MINUPTO:
1903        case OP_EXACT:
1904        case OP_POSSTAR:
1905        case OP_POSPLUS:
1906        case OP_POSQUERY:
1907        case OP_POSUPTO:
1908        case OP_STARI:
1909        case OP_MINSTARI:
1910        case OP_PLUSI:
1911        case OP_MINPLUSI:
1912        case OP_QUERYI:
1913        case OP_MINQUERYI:
1914        case OP_UPTOI:
1915        case OP_MINUPTOI:
1916        case OP_EXACTI:
1917        case OP_POSSTARI:
1918        case OP_POSPLUSI:
1919        case OP_POSQUERYI:
1920        case OP_POSUPTOI:
1921        case OP_NOTSTAR:
1922        case OP_NOTMINSTAR:
1923        case OP_NOTPLUS:
1924        case OP_NOTMINPLUS:
1925        case OP_NOTQUERY:
1926        case OP_NOTMINQUERY:
1927        case OP_NOTUPTO:
1928        case OP_NOTMINUPTO:
1929        case OP_NOTEXACT:
1930        case OP_NOTPOSSTAR:
1931        case OP_NOTPOSPLUS:
1932        case OP_NOTPOSQUERY:
1933        case OP_NOTPOSUPTO:
1934        case OP_NOTSTARI:
1935        case OP_NOTMINSTARI:
1936        case OP_NOTPLUSI:
1937        case OP_NOTMINPLUSI:
1938        case OP_NOTQUERYI:
1939        case OP_NOTMINQUERYI:
1940        case OP_NOTUPTOI:
1941        case OP_NOTMINUPTOI:
1942        case OP_NOTEXACTI:
1943        case OP_NOTPOSSTARI:
1944        case OP_NOTPOSPLUSI:
1945        case OP_NOTPOSQUERYI:
1946        case OP_NOTPOSUPTOI:
1947        if (utf) utf16_char = TRUE;
1948    #endif
1949        /* Fall through. */
1950    
1951        default:
1952        length = OP_lengths16[op] - 1;
1953        break;
1954    
1955        case OP_CLASS:
1956        case OP_NCLASS:
1957        /* Skip the character bit map. */
1958        ptr += 32/sizeof(pcre_uint16);
1959        length = 0;
1960        break;
1961    
1962        case OP_XCLASS:
1963        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1964        if (LINK_SIZE > 1)
1965          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1966            - (1 + LINK_SIZE + 1));
1967        else
1968          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1969    
1970        /* Reverse the size of the XCLASS instance. */
1971        *ptr = swap_uint16(*ptr);
1972        ptr++;
1973        if (LINK_SIZE > 1)
1974          {
1975          *ptr = swap_uint16(*ptr);
1976          ptr++;
1977          }
1978    
1979        op = *ptr;
1980        *ptr = swap_uint16(op);
1981        ptr++;
1982        if ((op & XCL_MAP) != 0)
1983          {
1984          /* Skip the character bit map. */
1985          ptr += 32/sizeof(pcre_uint16);
1986          length -= 32/sizeof(pcre_uint16);
1987          }
1988        break;
1989        }
1990      }
1991    /* Control should never reach here in 16 bit mode. */
1992    #endif /* SUPPORT_PCRE16 */
1993    }
1994    
1995    
1996    
# Line 1062  return ((value & 0x000000ff) << 24) | Line 1999  return ((value & 0x000000ff) << 24) |
1999  *************************************************/  *************************************************/
2000    
2001  static int  static int
2002  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2003    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2004    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2005  {  {
# Line 1077  for (;;) Line 2014  for (;;)
2014    {    {
2015    *limit = mid;    *limit = mid;
2016    
2017    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2018      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2019    
2020    if (count == errnumber)    if (count == errnumber)
# Line 1122  Returns: < 0, = 0, or > 0, according Line 2059  Returns: < 0, = 0, or > 0, according
2059  */  */
2060    
2061  static int  static int
2062  strncmpic(uschar *s, uschar *t, int n)  strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2063  {  {
2064  while (n--)  while (n--)
2065    {    {
# Line 1149  Returns: appropriate PCRE_NEWLINE_x Line 2086  Returns: appropriate PCRE_NEWLINE_x
2086  */  */
2087    
2088  static int  static int
2089  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2090  {  {
2091  if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2092  if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2093  if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2094  if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2095  if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2096  if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;  if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2097  if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;  if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2098  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2099  return 0;  return 0;
2100  }  }
# Line 1179  printf("If input is a terminal, readline Line 2116  printf("If input is a terminal, readline
2116  printf("This version of pcretest is not linked with readline().\n");  printf("This version of pcretest is not linked with readline().\n");
2117  #endif  #endif
2118  printf("\nOptions:\n");  printf("\nOptions:\n");
2119  printf("  -b       show compiled code (bytecode)\n");  #ifdef SUPPORT_PCRE16
2120    printf("  -16      use the 16-bit library\n");
2121    #endif
2122    printf("  -b       show compiled code\n");
2123  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2124    printf("  -C arg   show a specific compile-time option\n");
2125    printf("           and exit with its value. The arg can be:\n");
2126    printf("     linksize     internal link size [2, 3, 4]\n");
2127    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2128    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2129    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2130    printf("     ucp          Unicode Properties supported [0, 1]\n");
2131    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2132    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2133  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2134  #if !defined NODFA  #if !defined NODFA
2135  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
# Line 1195  printf(" -p use POSIX interface\n Line 2144  printf(" -p use POSIX interface\n
2144  #endif  #endif
2145  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2146  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2147  printf("  -s       force each pattern to be studied\n"  printf("  -s       force each pattern to be studied at basic level\n"
2148           "  -s+      force each pattern to be studied, using JIT if available\n"
2149           "  -s++     ditto, verifying when JIT was actually used\n"
2150           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2151           "             where 1 <= n <= 7 selects JIT options\n"
2152           "  -s++n    ditto, verifying when JIT was actually used\n"
2153         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2154  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2155  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 1215  options, followed by a set of test data, Line 2169  options, followed by a set of test data,
2169  int main(int argc, char **argv)  int main(int argc, char **argv)
2170  {  {
2171  FILE *infile = stdin;  FILE *infile = stdin;
2172    const char *version;
2173  int options = 0;  int options = 0;
2174  int study_options = 0;  int study_options = 0;
2175  int default_find_match_limit = FALSE;  int default_find_match_limit = FALSE;
# Line 1223  int timeit = 0; Line 2178  int timeit = 0;
2178  int timeitm = 0;  int timeitm = 0;
2179  int showinfo = 0;  int showinfo = 0;
2180  int showstore = 0;  int showstore = 0;
2181  int force_study = 0;  int force_study = -1;
2182    int force_study_options = 0;
2183  int quiet = 0;  int quiet = 0;
2184  int size_offsets = 45;  int size_offsets = 45;
2185  int size_offsets_max;  int size_offsets_max;
# Line 1234  int posix = 0; Line 2190  int posix = 0;
2190  int debug = 0;  int debug = 0;
2191  int done = 0;  int done = 0;
2192  int all_use_dfa = 0;  int all_use_dfa = 0;
2193    int verify_jit = 0;
2194  int yield = 0;  int yield = 0;
2195  int stack_size;  int stack_size;
2196    
2197  /* These vectors store, end-to-end, a list of captured substring names. Assume  pcre_jit_stack *jit_stack = NULL;
 that 1024 is plenty long enough for the few names we'll be testing. */  
   
 uschar copynames[1024];  
 uschar getnames[1024];  
2198    
2199  uschar *copynamesptr;  /* These vectors store, end-to-end, a list of zero-terminated captured
2200  uschar *getnamesptr;  substring names, each list itself being terminated by an empty name. Assume
2201    that 1024 is plenty long enough for the few names we'll be testing. It is
2202    easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2203    for the actual memory, to ensure alignment. */
2204    
2205    pcre_uint16 copynames[1024];
2206    pcre_uint16 getnames[1024];
2207    
2208    #ifdef SUPPORT_PCRE16
2209    pcre_uint16 *cn16ptr;
2210    pcre_uint16 *gn16ptr;
2211    #endif
2212    
2213  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2214  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2215    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2216    pcre_uint8 *cn8ptr;
2217    pcre_uint8 *gn8ptr;
2218    #endif
2219    
2220  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2221  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2222  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2223    
2224    buffer = (pcre_uint8 *)malloc(buffer_size);
2225    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2226    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2227    
2228  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2229    
# Line 1266  it set 0x8000, but then I was advised th Line 2238  it set 0x8000, but then I was advised th
2238  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2239  #endif  #endif
2240    
2241    /* Get the version number: both pcre_version() and pcre16_version() give the
2242    same answer. We just need to ensure that we call one that is available. */
2243    
2244    #ifdef SUPPORT_PCRE8
2245    version = pcre_version();
2246    #else
2247    version = pcre16_version();
2248    #endif
2249    
2250  /* Scan options */  /* Scan options */
2251    
2252  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2253    {    {
2254    unsigned char *endptr;    pcre_uint8 *endptr;
2255      char *arg = argv[op];
2256    
2257    if (strcmp(argv[op], "-m") == 0) showstore = 1;    if (strcmp(arg, "-m") == 0) showstore = 1;
2258    else if (strcmp(argv[op], "-s") == 0) force_study = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2259    else if (strcmp(argv[op], "-q") == 0) quiet = 1;  
2260    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strncmp(arg, "-s+", 3) == 0)
2261    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      {
2262    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      arg += 3;
2263    else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;      if (*arg == '+') { arg++; verify_jit = TRUE; }
2264        force_study = 1;
2265        if (*arg == 0)
2266          force_study_options = jit_study_bits[6];
2267        else if (*arg >= '1' && *arg <= '7')
2268          force_study_options = jit_study_bits[*arg - '1'];
2269        else goto BAD_ARG;
2270        }
2271      else if (strcmp(arg, "-16") == 0)
2272        {
2273    #ifdef SUPPORT_PCRE16
2274        use_pcre16 = 1;
2275    #else
2276        printf("** This version of PCRE was built without 16-bit support\n");
2277        exit(1);
2278    #endif
2279        }
2280      else if (strcmp(arg, "-q") == 0) quiet = 1;
2281      else if (strcmp(arg, "-b") == 0) debug = 1;
2282      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2283      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2284      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2285  #if !defined NODFA  #if !defined NODFA
2286    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2287  #endif  #endif
2288    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2289        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2290          *endptr == 0))          *endptr == 0))
2291      {      {
2292      op++;      op++;
2293      argc--;      argc--;
2294      }      }
2295    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2296      {      {
2297      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2298      int temp;      int temp;
2299      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2300                       *endptr == 0))                       *endptr == 0))
2301        {        {
2302        timeitm = temp;        timeitm = temp;
# Line 1303  while (argc > 1 && argv[op][0] == '-') Line 2306  while (argc > 1 && argv[op][0] == '-')
2306      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2307      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2308      }      }
2309    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2310        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2311          *endptr == 0))          *endptr == 0))
2312      {      {
2313  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
# Line 1321  while (argc > 1 && argv[op][0] == '-') Line 2324  while (argc > 1 && argv[op][0] == '-')
2324      printf("PCRE: setrlimit() failed with error %d\n", rc);      printf("PCRE: setrlimit() failed with error %d\n", rc);
2325      exit(1);      exit(1);
2326        }        }
2327      op++;      op++;
2328      argc--;      argc--;
2329  #endif  #endif
2330      }      }
2331  #if !defined NOPOSIX  #if !defined NOPOSIX
2332    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2333  #endif  #endif
2334    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2335      {      {
2336      int rc;      int rc;
2337      unsigned long int lrc;      unsigned long int lrc;
2338      printf("PCRE version %s\n", pcre_version());  
2339        if (argc > 2)
2340          {
2341          if (strcmp(argv[op + 1], "linksize") == 0)
2342            {
2343            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2344            printf("%d\n", rc);
2345            yield = rc;
2346            goto EXIT;
2347            }
2348          if (strcmp(argv[op + 1], "pcre8") == 0)
2349            {
2350    #ifdef SUPPORT_PCRE8
2351            printf("1\n");
2352            yield = 1;
2353    #else
2354            printf("0\n");
2355            yield = 0;
2356    #endif
2357            goto EXIT;
2358            }
2359          if (strcmp(argv[op + 1], "pcre16") == 0)
2360            {
2361    #ifdef SUPPORT_PCRE16
2362            printf("1\n");
2363            yield = 1;
2364    #else
2365            printf("0\n");
2366            yield = 0;
2367    #endif
2368            goto EXIT;
2369            }
2370          if (strcmp(argv[op + 1], "utf") == 0)
2371            {
2372    #ifdef SUPPORT_PCRE8
2373            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2374            printf("%d\n", rc);
2375            yield = rc;
2376    #else
2377            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2378            printf("%d\n", rc);
2379            yield = rc;
2380    #endif
2381            goto EXIT;
2382            }
2383          if (strcmp(argv[op + 1], "ucp") == 0)
2384            {
2385            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2386            printf("%d\n", rc);
2387            yield = rc;
2388            goto EXIT;
2389            }
2390          if (strcmp(argv[op + 1], "jit") == 0)
2391            {
2392            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2393            printf("%d\n", rc);
2394            yield = rc;
2395            goto EXIT;
2396            }
2397          if (strcmp(argv[op + 1], "newline") == 0)
2398            {
2399            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2400            /* Note that these values are always the ASCII values, even
2401            in EBCDIC environments. CR is 13 and NL is 10. */
2402            printf("%s\n", (rc == 13)? "CR" :
2403              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2404              (rc == -2)? "ANYCRLF" :
2405              (rc == -1)? "ANY" : "???");
2406            goto EXIT;
2407            }
2408          printf("Unknown -C option: %s\n", argv[op + 1]);
2409          goto EXIT;
2410          }
2411    
2412        printf("PCRE version %s\n", version);
2413      printf("Compiled with\n");      printf("Compiled with\n");
2414    
2415    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2416    are set, either both UTFs are supported or both are not supported. */
2417    
2418    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2419        printf("  8-bit and 16-bit support\n");
2420        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2421        if (rc)
2422          printf("  UTF-8 and UTF-16 support\n");
2423        else
2424          printf("  No UTF-8 or UTF-16 support\n");
2425    #elif defined SUPPORT_PCRE8
2426        printf("  8-bit support only\n");
2427      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2428      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2429      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2430        printf("  16-bit support only\n");
2431        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2432        printf("  %sUTF-16 support\n", rc? "" : "No ");
2433    #endif
2434    
2435        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2436      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2437      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2438        if (rc)
2439          {
2440          const char *arch;
2441          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2442          printf("  Just-in-time compiler support: %s\n", arch);
2443          }
2444        else
2445          printf("  No just-in-time compiler support\n");
2446        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2447      /* Note that these values are always the ASCII values, even      /* Note that these values are always the ASCII values, even
2448      in EBCDIC environments. CR is 13 and NL is 10. */      in EBCDIC environments. CR is 13 and NL is 10. */
2449      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :      printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2450        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :        (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2451        (rc == -2)? "ANYCRLF" :        (rc == -2)? "ANYCRLF" :
2452        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2453      (void)pcre_config(PCRE_CONFIG_BSR, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2454      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :      printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2455                                       "all Unicode newlines");                                       "all Unicode newlines");
2456      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2457      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2458      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2459      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2460      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2461      printf("  Default match limit = %ld\n", lrc);      printf("  Default match limit = %ld\n", lrc);
2462      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2463      printf("  Default recursion depth limit = %ld\n", lrc);      printf("  Default recursion depth limit = %ld\n", lrc);
2464      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2465      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2466        if (showstore)
2467          {
2468          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2469          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2470          }
2471        printf("\n");
2472      goto EXIT;      goto EXIT;
2473      }      }
2474    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2475             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2476      {      {
2477      usage();      usage();
2478      goto EXIT;      goto EXIT;
2479      }      }
2480    else    else
2481      {      {
2482      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2483        printf("** Unknown or malformed option %s\n", arg);
2484      usage();      usage();
2485      yield = 1;      yield = 1;
2486      goto EXIT;      goto EXIT;
# Line 1415  if (argc > 2) Line 2527  if (argc > 2)
2527    
2528  /* Set alternative malloc function */  /* Set alternative malloc function */
2529    
2530    #ifdef SUPPORT_PCRE8
2531  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2532  pcre_free = new_free;  pcre_free = new_free;
2533  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2534  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2535    #endif
2536    
2537    #ifdef SUPPORT_PCRE16
2538    pcre16_malloc = new_malloc;
2539    pcre16_free = new_free;
2540    pcre16_stack_malloc = stack_malloc;
2541    pcre16_stack_free = stack_free;
2542    #endif
2543    
2544  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2545    
2546  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2547    
2548  /* Main loop */  /* Main loop */
2549    
# Line 1437  while (!done) Line 2558  while (!done)
2558  #endif  #endif
2559    
2560    const char *error;    const char *error;
2561    unsigned char *markptr;    pcre_uint8 *markptr;
2562    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
2563    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
2564    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
2565      unsigned long int get_options;
2566    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2567    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2568    int do_allcaps = 0;    int do_allcaps = 0;
2569    int do_mark = 0;    int do_mark = 0;
2570    int do_study = 0;    int do_study = 0;
2571    int no_force_study = 0;    int no_force_study = 0;
2572    int do_debug = debug;    int do_debug = debug;
2573    int do_G = 0;    int do_G = 0;
2574    int do_g = 0;    int do_g = 0;
# Line 1456  while (!done) Line 2578  while (!done)
2578    int do_flip = 0;    int do_flip = 0;
2579    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2580    
2581    use_utf8 = 0;    use_utf = 0;
2582    debug_lengths = 1;    debug_lengths = 1;
2583    
2584    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1471  while (!done) Line 2593  while (!done)
2593    
2594    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2595      {      {
2596      unsigned long int magic, get_options;      pcre_uint32 magic;
2597      uschar sbuf[8];      pcre_uint8 sbuf[8];
2598      FILE *f;      FILE *f;
2599    
2600      p++;      p++;
2601        if (*p == '!')
2602          {
2603          do_debug = TRUE;
2604          do_showinfo = TRUE;
2605          p++;
2606          }
2607    
2608      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2609      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2610      *pp = 0;      *pp = 0;
# Line 1487  while (!done) Line 2616  while (!done)
2616        continue;        continue;
2617        }        }
2618    
2619        first_gotten_store = 0;
2620      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2621    
2622      true_size =      true_size =
# Line 1494  while (!done) Line 2624  while (!done)
2624      true_study_size =      true_study_size =
2625        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2626    
2627      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2628      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2629    
2630      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2631    
2632      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2633      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2634        {        {
2635        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2636          {          {
2637          do_flip = 1;          do_flip = 1;
2638          }          }
# Line 1514  while (!done) Line 2644  while (!done)
2644          }          }
2645        }        }
2646    
2647        /* We hide the byte-invert info for little and big endian tests. */
2648      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2649        do_flip? " (byte-inverted)" : "", p);        do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
   
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
2650    
2651      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
2652    
# Line 1538  while (!done) Line 2664  while (!done)
2664          {          {
2665          FAIL_READ:          FAIL_READ:
2666          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2667          if (extra != NULL) new_free(extra);          if (extra != NULL)
2668              {
2669              PCRE_FREE_STUDY(extra);
2670              }
2671          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2672          fclose(f);          fclose(f);
2673          continue;          continue;
# Line 1548  while (!done) Line 2677  while (!done)
2677        }        }
2678      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2679    
2680        /* Flip the necessary bytes. */
2681        if (do_flip)
2682          {
2683          int rc;
2684          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2685          if (rc == PCRE_ERROR_BADMODE)
2686            {
2687            /* Simulate the result of the function call below. */
2688            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2689              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2690            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2691              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2692            continue;
2693            }
2694          }
2695    
2696        /* Need to know if UTF-8 for printing data strings. */
2697    
2698        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2699        use_utf = (get_options & PCRE_UTF8) != 0;
2700    
2701      fclose(f);      fclose(f);
2702      goto SHOW_INFO;      goto SHOW_INFO;
2703      }      }
2704    
2705    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2706    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2707    
2708    delimiter = *p++;    delimiter = *p++;
2709    
# Line 1619  while (!done) Line 2769  while (!done)
2769        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2770    
2771        case '+':        case '+':
2772        if (do_showrest) do_showcaprest = 1; else do_showrest = 1;        if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2773        break;        break;
2774    
2775        case '=': do_allcaps = 1; break;        case '=': do_allcaps = 1; break;
2776        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2777        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2778        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1640  while (!done) Line 2790  while (!done)
2790        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2791  #endif  #endif
2792    
2793        case 'S':        case 'S':
2794        if (do_study == 0) do_study = 1; else        if (do_study == 0)
2795            {
2796            do_study = 1;
2797            if (*pp == '+')
2798              {
2799              if (*(++pp) == '+')
2800                {
2801                verify_jit = TRUE;
2802                pp++;
2803                }
2804              if (*pp >= '1' && *pp <= '7')
2805                study_options |= jit_study_bits[*pp++ - '1'];
2806              else
2807                study_options |= jit_study_bits[6];
2808              }
2809            }
2810          else
2811          {          {
2812          do_study = 0;          do_study = 0;
2813          no_force_study = 1;          no_force_study = 1;
2814          }          }
2815        break;        break;
2816    
2817        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
# Line 1653  while (!done) Line 2819  while (!done)
2819        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2820        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2821        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2822        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2823        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2824    
2825        case 'T':        case 'T':
# Line 1687  while (!done) Line 2853  while (!done)
2853          goto SKIP_DATA;          goto SKIP_DATA;
2854          }          }
2855        locale_set = 1;        locale_set = 1;
2856        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2857        pp = ppp;        pp = ppp;
2858        break;        break;
2859    
# Line 1700  while (!done) Line 2866  while (!done)
2866    
2867        case '<':        case '<':
2868          {          {
2869          if (strncmpic(pp, (uschar *)"JS>", 3) == 0)          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2870            {            {
2871            options |= PCRE_JAVASCRIPT_COMPAT;            options |= PCRE_JAVASCRIPT_COMPAT;
2872            pp += 3;            pp += 3;
# Line 1728  while (!done) Line 2894  while (!done)
2894    
2895    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2896    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2897    local character tables. */    local character tables. Neither does it have 16-bit support. */
2898    
2899  #if !defined NOPOSIX  #if !defined NOPOSIX
2900    if (posix || do_posix)    if (posix || do_posix)
# Line 1744  while (!done) Line 2910  while (!done)
2910      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;      if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2911      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;      if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2912    
2913        first_gotten_store = 0;
2914      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2915    
2916      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1763  while (!done) Line 2930  while (!done)
2930  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2931    
2932      {      {
2933      unsigned long int get_options;      /* In 16-bit mode, convert the input. */
2934    
2935    #ifdef SUPPORT_PCRE16
2936        if (use_pcre16)
2937          {
2938          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2939            {
2940            case -1:
2941            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2942              "converted to UTF-16\n");
2943            goto SKIP_DATA;
2944    
2945            case -2:
2946            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2947              "cannot be converted to UTF-16\n");
2948            goto SKIP_DATA;
2949    
2950            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2951            fprintf(outfile, "**Failed: character value greater than 0xffff "
2952              "cannot be converted to 16-bit in non-UTF mode\n");
2953            goto SKIP_DATA;
2954    
2955            default:
2956            break;
2957            }
2958          p = (pcre_uint8 *)buffer16;
2959          }
2960    #endif
2961    
2962        /* Compile many times when timing */
2963    
2964      if (timeit > 0)      if (timeit > 0)
2965        {        {
# Line 1772  while (!done) Line 2968  while (!done)
2968        clock_t start_time = clock();        clock_t start_time = clock();
2969        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2970          {          {
2971          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2972          if (re != NULL) free(re);          if (re != NULL) free(re);
2973          }          }
2974        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1781  while (!done) Line 2977  while (!done)
2977            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2978        }        }
2979    
2980      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2981        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2982    
2983      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2984      if non-interactive. */      if non-interactive. */
# Line 1812  while (!done) Line 3009  while (!done)
3009      within the regex; check for this so that we know how to process the data      within the regex; check for this so that we know how to process the data
3010      lines. */      lines. */
3011    
3012      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3013      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;        goto SKIP_DATA;
3014        if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
     /* Print information if required. There are now two info-returning  
     functions. The old one has a limited interface and returns only limited  
     data. Check that it agrees with the newer one. */  
   
     if (log_store)  
       fprintf(outfile, "Memory allocation (code space): %d\n",  
         (int)(gotten_store -  
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3015    
3016      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3017      and remember the store that was got. */      and remember the store that was got. */
3018    
3019      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3020      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3021    
3022        /* Output code size information if requested */
3023    
3024        if (log_store)
3025          fprintf(outfile, "Memory allocation (code space): %d\n",
3026            (int)(first_gotten_store -
3027                  sizeof(REAL_PCRE) -
3028                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3029    
3030      /* If -s or /S was present, study the regex to generate additional info to      /* If -s or /S was present, study the regex to generate additional info to
3031      help with the matching, unless the pattern has the SS option, which      help with the matching, unless the pattern has the SS option, which
3032      suppresses the effect of /S (used for a few test patterns where studying is      suppresses the effect of /S (used for a few test patterns where studying is
3033      never sensible). */      never sensible). */
3034    
3035      if (do_study || (force_study && !no_force_study))      if (do_study || (force_study >= 0 && !no_force_study))
3036        {        {
3037        if (timeit > 0)        if (timeit > 0)
3038          {          {
# Line 1844  while (!done) Line 3040  while (!done)
3040          clock_t time_taken;          clock_t time_taken;
3041          clock_t start_time = clock();          clock_t start_time = clock();
3042          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3043            extra = pcre_study(re, study_options, &error);            {
3044              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3045              }
3046          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3047          if (extra != NULL) free(extra);          if (extra != NULL)
3048              {
3049              PCRE_FREE_STUDY(extra);
3050              }
3051          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3052            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3053              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3054          }          }
3055        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3056        if (error != NULL)        if (error != NULL)
3057          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3058        else if (extra != NULL)        else if (extra != NULL)
3059            {
3060          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3061            if (log_store)
3062              {
3063              size_t jitsize;
3064              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3065                  jitsize != 0)
3066                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3067              }
3068            }
3069        }        }
3070    
3071      /* If /K was present, we set up for handling MARK data. */      /* If /K was present, we set up for handling MARK data. */
# Line 1871  while (!done) Line 3081  while (!done)
3081        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
3082        }        }
3083    
3084      /* If the 'F' option was present, we flip the bytes of all the integer      /* Extract and display information from the compiled data if required. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte =  
         (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte =  
         (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
     /* Extract information from the compiled data if required */  
3085    
3086      SHOW_INFO:      SHOW_INFO:
3087    
3088      if (do_debug)      if (do_debug)
3089        {        {
3090        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3091        pcre_printint(re, outfile, debug_lengths);        PCRE_PRINTINT(re, outfile, debug_lengths);
3092        }        }
3093    
3094      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 1923  while (!done) Line 3096  while (!done)
3096      if (do_showinfo)      if (do_showinfo)
3097        {        {
3098        unsigned long int all_options;        unsigned long int all_options;
 #if !defined NOINFOCHECK  
       int old_first_char, old_options, old_count;  
 #endif  
3099        int count, backrefmax, first_char, need_char, okpartial, jchanged,        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3100          hascrorlf;          hascrorlf;
3101        int nameentrysize, namecount;        int nameentrysize, namecount;
3102        const uschar *nametable;        const pcre_uint8 *nametable;
3103    
3104        new_info(re, NULL, PCRE_INFO_SIZE, &size);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3105        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3106        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3107        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3108        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3109        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3110        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3111        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3112        new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3113        new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);            new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3114        new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3115              != 0)
3116  #if !defined NOINFOCHECK          goto SKIP_DATA;
       old_count = pcre_info(re, &old_options, &old_first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3117    
3118        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3119          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1976  while (!done) Line 3128  while (!done)
3128          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3129          while (namecount-- > 0)          while (namecount-- > 0)
3130            {            {
3131            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3132              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3133              GET2(nametable, 0));  #else
3134              int imm2_size = IMM2_SIZE;
3135    #endif
3136              int length = (int)STRLEN(nametable + imm2_size);
3137              fprintf(outfile, "  ");
3138              PCHARSV(nametable, imm2_size, length, outfile);
3139              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3140    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3141              fprintf(outfile, "%3d\n", use_pcre16?
3142                 (int)(((PCRE_SPTR16)nametable)[0])
3143                :((int)nametable[0] << 8) | (int)nametable[1]);
3144              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3145    #else
3146              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3147    #ifdef SUPPORT_PCRE8
3148            nametable += nameentrysize;            nametable += nameentrysize;
3149    #else
3150              nametable += nameentrysize * 2;
3151    #endif
3152    #endif
3153            }            }
3154          }          }
3155    
3156        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3157        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
3158    
3159        all_options = ((real_pcre *)re)->options;        all_options = ((REAL_PCRE *)re)->options;
3160        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));        if (do_flip) all_options = swap_uint32(all_options);
3161    
3162        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3163          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
# Line 2003  while (!done) Line 3173  while (!done)
3173            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3174            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3175            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3176            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3177            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3178            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3179            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3180            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3181    
# Line 2047  while (!done) Line 3217  while (!done)
3217          }          }
3218        else        else
3219          {          {
3220          int ch = first_char & 255;          const char *caseless =
3221          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3222            "" : " (caseless)";            "" : " (caseless)";
3223          if (PRINTHEX(ch))  
3224            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3225              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3226          else          else
3227            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3228              fprintf(outfile, "First char = ");
3229              pchar(first_char, outfile);
3230              fprintf(outfile, "%s\n", caseless);
3231              }
3232          }          }
3233    
3234        if (need_char < 0)        if (need_char < 0)
# Line 2062  while (!done) Line 3237  while (!done)
3237          }          }
3238        else        else
3239          {          {
3240          int ch = need_char & 255;          const char *caseless =
3241          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3242            "" : " (caseless)";            "" : " (caseless)";
3243          if (PRINTHEX(ch))  
3244            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3245              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3246          else          else
3247            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3248              fprintf(outfile, "Need char = ");
3249              pchar(need_char, outfile);
3250              fprintf(outfile, "%s\n", caseless);
3251              }
3252          }          }
3253    
3254        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3255        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3256        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3257        flipped.) If study was forced by an external -s, don't show this        flipped.) If study was forced by an external -s, don't show this
3258        information unless -i or -d was also present. This means that, except        information unless -i or -d was also present. This means that, except
3259        when auto-callouts are involved, the output from runs with and without        when auto-callouts are involved, the output from runs with and without
3260        -s should be identical. */        -s should be identical. */
3261    
3262        if (do_study || (force_study && showinfo && !no_force_study))        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3263          {          {
3264          if (extra == NULL)          if (extra == NULL)
3265            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3266          else          else
3267            {            {
3268            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3269            int minlength;            int minlength;
3270    
3271            new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3272            fprintf(outfile, "Subject length lower bound = %d\n", minlength);              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3273    
3274            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
           if (start_bits == NULL)  
             fprintf(outfile, "No set of starting bytes\n");  
           else  
3275              {              {
3276              int i;              if (start_bits == NULL)
3277              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3278              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3279                {                {
3280                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3281                  int c = 24;
3282                  fprintf(outfile, "Starting byte set: ");
3283                  for (i = 0; i < 256; i++)
3284                  {                  {
3285                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
                   {  
                   fprintf(outfile, "%c ", i);  
                   c += 2;  
                   }  
                 else  
3286                    {                    {
3287                    fprintf(outfile, "\\x%02x ", i);                    if (c > 75)
3288                    c += 5;                      {
3289                        fprintf(outfile, "\n  ");
3290                        c = 2;
3291                        }
3292                      if (PRINTOK(i) && i != ' ')
3293                        {
3294                        fprintf(outfile, "%c ", i);
3295                        c += 2;
3296                        }
3297                      else
3298                        {
3299                        fprintf(outfile, "\\x%02x ", i);
3300                        c += 5;
3301                        }
3302                    }                    }
3303                  }                  }
3304                  fprintf(outfile, "\n");
3305                }                }
3306              fprintf(outfile, "\n");              }
3307              }
3308    
3309            /* Show this only if the JIT was set by /S, not by -s. */
3310    
3311            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3312              {
3313              int jit;
3314              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3315                {
3316                if (jit)
3317                  fprintf(outfile, "JIT study was successful\n");
3318                else
3319    #ifdef SUPPORT_JIT
3320                  fprintf(outfile, "JIT study was not successful\n");
3321    #else
3322                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3323    #endif
3324              }              }
3325            }            }
3326          }          }
# Line 2139  while (!done) Line 3339  while (!done)
3339          }          }
3340        else        else
3341          {          {
3342          uschar sbuf[8];          pcre_uint8 sbuf[8];
3343          sbuf[0] = (uschar)((true_size >> 24) & 255);  
3344          sbuf[1] = (uschar)((true_size >> 16) & 255);          if (do_flip) regexflip(re, extra);
3345          sbuf[2] = (uschar)((true_size >>  8) & 255);          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3346          sbuf[3] = (uschar)((true_size) & 255);          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3347            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3348          sbuf[4] = (uschar)((true_study_size >> 24) & 255);          sbuf[3] = (pcre_uint8)((true_size) & 255);
3349          sbuf[5] = (uschar)((true_study_size >> 16) & 255);          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3350          sbuf[6] = (uschar)((true_study_size >>  8) & 255);          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3351          sbuf[7] = (uschar)((true_study_size) & 255);          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3352            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3353    
3354          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3355              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 2158  while (!done) Line 3359  while (!done)
3359          else          else
3360            {            {
3361            fprintf(outfile, "Compiled pattern written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3362    
3363            /* If there is study data, write it, but verify the writing only            /* If there is study data, write it. */
3364            if the studying was requested by /S, not just by -s. */  
   
3365            if (extra != NULL)            if (extra != NULL)
3366              {              {
3367              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 2177  while (!done) Line 3377  while (!done)
3377          }          }
3378    
3379        new_free(re);        new_free(re);
3380        if (extra != NULL) new_free(extra);        if (extra != NULL)
3381            {
3382            PCRE_FREE_STUDY(extra);
3383            }
3384        if (locale_set)        if (locale_set)
3385          {          {
3386          new_free((void *)tables);          new_free((void *)tables);
# Line 2192  while (!done) Line 3395  while (!done)
3395    
3396    for (;;)    for (;;)
3397      {      {
3398      uschar *q;      pcre_uint8 *q;
3399      uschar *bptr;      pcre_uint8 *bptr;
3400      int *use_offsets = offsets;      int *use_offsets = offsets;
3401      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3402      int callout_data = 0;      int callout_data = 0;
# Line 2208  while (!done) Line 3411  while (!done)
3411      int start_offset_sign = 1;      int start_offset_sign = 1;
3412      int g_notempty = 0;      int g_notempty = 0;
3413      int use_dfa = 0;      int use_dfa = 0;
3414        int jit_was_used = 0;
     options = 0;  
3415    
3416      *copynames = 0;      *copynames = 0;
3417      *getnames = 0;      *getnames = 0;
3418    
3419      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3420      getnamesptr = getnames;      cn16ptr = copynames;
3421        gn16ptr = getnames;
3422    #endif
3423    #ifdef SUPPORT_PCRE8
3424        cn8ptr = copynames8;
3425        gn8ptr = getnames8;
3426    #endif
3427    
3428      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3429      first_callout = 1;      first_callout = 1;
3430      last_callout_mark = NULL;      last_callout_mark = NULL;
3431      callout_extra = 0;      callout_extra = 0;
3432      callout_count = 0;      callout_count = 0;
3433      callout_fail_count = 999999;      callout_fail_count = 999999;
3434      callout_fail_id = -1;      callout_fail_id = -1;
3435      show_malloc = 0;      show_malloc = 0;
3436        options = 0;
3437    
3438      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3439        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 2260  while (!done) Line 3469  while (!done)
3469        int i = 0;        int i = 0;
3470        int n = 0;        int n = 0;
3471    
3472        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3473          In non-UTF mode, allow the value of the byte to fall through to later,
3474          where values greater than 127 are turned into UTF-8 when running in
3475          16-bit mode. */
3476    
3477          if (c != '\\')
3478            {
3479            if (use_utf)
3480              {
3481              *q++ = c;
3482              continue;
3483              }
3484            }
3485    
3486          /* Handle backslash escapes */
3487    
3488          else switch ((c = *p++))
3489          {          {
3490          case 'a': c =    7; break;          case 'a': c =    7; break;
3491          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 2276  while (!done) Line 3501  while (!done)
3501          c -= '0';          c -= '0';
3502          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3503            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3504          break;          break;
3505    
3506          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3507          if (*p == '{')          if (*p == '{')
3508            {            {
3509            unsigned char *pt = p;            pcre_uint8 *pt = p;
3510            c = 0;            c = 0;
3511            while (isxdigit(*(++pt)))  
3512              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3513              when isxdigit() is a macro that refers to its argument more than
3514              once. This is banned by the C Standard, but apparently happens in at
3515              least one MacOS environment. */
3516    
3517              for (pt++; isxdigit(*pt); pt++)
3518                {
3519                if (++i == 9)
3520                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3521                                   "using only the first eight.\n");
3522                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3523                }
3524            if (*pt == '}')            if (*pt == '}')
3525              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             if (use_utf8)  
               {  
               utn = ord2utf8(c, buff8);  
               for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
               c = buff8[ii];   /* Last byte */  
               }  
             else  
              {  
              if (c > 255)  
                fprintf(outfile, "** Character \\x{%x} is greater than 255 and "  
                  "UTF-8 mode is not enabled.\n"  
                  "** Truncation will probably give the wrong result.\n", c);  
              }  
3526              p = pt + 1;              p = pt + 1;
3527              break;              break;
3528              }              }
3529            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3530            }            }
 #endif  
3531    
3532          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3533            allows UTF-8 characters to be constructed byte by byte, and also allows
3534            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3535            Otherwise, pass it down to later code so that it can be turned into
3536            UTF-8 when running in 16-bit mode. */
3537    
3538          c = 0;          c = 0;
3539          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3540            {            {
3541            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3542            p++;            p++;
3543            }            }
3544            if (use_utf)
3545              {
3546              *q++ = c;
3547              continue;
3548              }
3549          break;          break;
3550    
3551          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 2364  while (!done) Line 3578  while (!done)
3578            }            }
3579          else if (isalnum(*p))          else if (isalnum(*p))
3580            {            {
3581            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3582            }            }
3583          else if (*p == '+')          else if (*p == '+')
3584            {            {
# Line 2380  while (!done) Line 3587  while (!done)
3587            }            }
3588          else if (*p == '-')          else if (*p == '-')
3589            {            {
3590            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3591            p++;            p++;
3592            }            }
3593          else if (*p == '!')          else if (*p == '!')
# Line 2434  while (!done) Line 3641  while (!done)
3641            }            }
3642          else if (isalnum(*p))          else if (isalnum(*p))
3643            {            {
3644            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3645            while (isalnum(*p)) *npp++ = *p++;            }
3646            *npp++ = 0;          continue;
3647            *npp = 0;  
3648            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3649            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3650              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3651            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3652                && extra->executable_jit != NULL)
3653              {
3654              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3655              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3656              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3657            }            }
3658          continue;          continue;
3659    
# Line 2537  while (!done) Line 3749  while (!done)
3749            }            }
3750          continue;          continue;
3751          }          }
3752        *q++ = c;  
3753          /* We now have a character value in c that may be greater than 255. In
3754          16-bit mode, we always convert characters to UTF-8 so that values greater
3755          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3756          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3757          mode must have come from \x{...} or octal constructs because values from
3758          \x.. get this far only in non-UTF mode. */
3759    
3760    #if !defined NOUTF || defined SUPPORT_PCRE16
3761          if (use_pcre16 || use_utf)
3762            {
3763            pcre_uint8 buff8[8];
3764            int ii, utn;
3765            utn = ord2utf8(c, buff8);
3766            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3767            }
3768          else
3769    #endif
3770            {
3771            if (c > 255)
3772              {
3773              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3774                "and UTF-8 mode is not enabled.\n", c);
3775              fprintf(outfile, "** Truncation will probably give the wrong "
3776                "result.\n");
3777              }
3778            *q++ = c;
3779            }
3780        }        }
3781    
3782        /* Reached end of subject string */
3783    
3784      *q = 0;      *q = 0;
3785      len = (int)(q - dbuffer);      len = (int)(q - dbuffer);
3786    
# Line 2600  while (!done) Line 3842  while (!done)
3842            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3843              {              {
3844              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3845              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer, pmatch[i].rm_so,
3846                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3847              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3848              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3849                {                {
3850                fprintf(outfile, "%2d+ ", (int)i);                fprintf(outfile, "%2d+ ", (int)i);
3851                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer, pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3852                  outfile);                  outfile);
3853                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3854                }                }
# Line 2614  while (!done) Line 3856  while (!done)
3856            }            }
3857          }          }
3858        free(pmatch);        free(pmatch);
3859          goto NEXT_DATA;
3860        }        }
3861    
3862    #endif  /* !defined NOPOSIX */
3863    
3864      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3865    
3866      else  #ifdef SUPPORT_PCRE16
3867  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3868          {
3869          len = to16(TRUE, bptr, (((REAL_PCRE *)re)->options) & PCRE_UTF8, len);
3870          switch(len)
3871            {
3872            case -1:
3873            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3874              "converted to UTF-16\n");
3875            goto NEXT_DATA;
3876    
3877            case -2:
3878            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
3879              "cannot be converted to UTF-16\n");
3880            goto NEXT_DATA;
3881    
3882            case -3:
3883            fprintf(outfile, "**Failed: character value greater than 0xffff "
3884              "cannot be converted to 16-bit in non-UTF mode\n");
3885            goto NEXT_DATA;
3886    
3887            default:
3888            break;
3889            }
3890          bptr = (pcre_uint8 *)buffer16;
3891          }
3892    #endif
3893    
3894      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3895        {        {
# Line 2636  while (!done) Line 3906  while (!done)
3906            {            {
3907            int workspace[1000];            int workspace[1000];
3908            for (i = 0; i < timeitm; i++)            for (i = 0; i < timeitm; i++)
3909              count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,              {
3910                options | g_notempty, use_offsets, use_size_offsets, workspace,              PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3911                sizeof(workspace)/sizeof(int));                (options | g_notempty), use_offsets, use_size_offsets, workspace,
3912                  (sizeof(workspace)/sizeof(int)));
3913                }
3914            }            }
3915          else          else
3916  #endif  #endif
3917    
3918          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3919            count = pcre_exec(re, extra, (char *)bptr, len,            {
3920              start_offset, options | g_notempty, use_offsets, use_size_offsets);            PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3921                (options | g_notempty), use_offsets, use_size_offsets);
3922              }
3923          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3924          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3925            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2655  while (!done) Line 3928  while (!done)
3928    
3929        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
3930        varying limits in order to find the minimum value for the match limit and        varying limits in order to find the minimum value for the match limit and
3931        for the recursion limit. */        for the recursion limit. The match limits are relevant only to the normal
3932          running of pcre_exec(), so disable the JIT optimization. This makes it
3933          possible to run the same set of tests with and without JIT externally
3934          requested. */
3935    
3936        if (find_match_limit)        if (find_match_limit)
3937          {          {
# Line 2664  while (!done) Line 3940  while (!done)
3940            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3941            extra->flags = 0;            extra->flags = 0;
3942            }            }
3943            else extra->flags &= ~PCRE_EXTRA_EXECUTABLE_JIT;
3944    
3945          (void)check_match_limit(re, extra, bptr, len, start_offset,          (void)check_match_limit(re, extra, bptr, len, start_offset,
3946            options|g_notempty, use_offsets, use_size_offsets,            options|g_notempty, use_offsets, use_size_offsets,
# Line 2687  while (!done) Line 3964  while (!done)
3964            }            }
3965          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3966          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3967          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3968            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3969          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3970          }          }
# Line 2699  while (!done) Line 3976  while (!done)
3976        else if (all_use_dfa || use_dfa)        else if (all_use_dfa || use_dfa)
3977          {          {
3978          int workspace[1000];          int workspace[1000];
3979          count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset,
3980            options | g_notempty, use_offsets, use_size_offsets, workspace,            (options | g_notempty), use_offsets, use_size_offsets, workspace,
3981            sizeof(workspace)/sizeof(int));            (sizeof(workspace)/sizeof(int)));
3982          if (count == 0)          if (count == 0)
3983            {            {
3984            fprintf(outfile, "Matched, but too many subsidiary matches\n");            fprintf(outfile, "Matched, but too many subsidiary matches\n");
# Line 2712  while (!done) Line 3989  while (!done)
3989    
3990        else        else
3991          {          {
3992          count = pcre_exec(re, extra, (char *)bptr, len,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3993            start_offset, options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3994          if (count == 0)          if (count == 0)
3995            {            {
3996            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
3997            count = use_size_offsets/3;            count = use_size_offsets/3;
3998            }            }
3999          }          }
4000    
4001          if (extra != NULL && (extra->flags & PCRE_EXTRA_USED_JIT) != 0)
4002            jit_was_used = TRUE;
4003    
4004        /* Matched */        /* Matched */
4005    
4006        if (count >= 0)        if (count >= 0)
4007          {          {
4008          int i, maxcount;          int i, maxcount;
4009            void *cnptr, *gnptr;
4010    
4011  #if !defined NODFA  #if !defined NODFA
4012          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else          if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
# Line 2746  while (!done) Line 4027  while (!done)
4027              do_g = do_G = FALSE;        /* Break g/G loop */              do_g = do_G = FALSE;        /* Break g/G loop */
4028              }              }
4029            }            }
4030    
4031          /* do_allcaps requests showing of all captures in the pattern, to check          /* do_allcaps requests showing of all captures in the pattern, to check
4032          unset ones at the end. */          unset ones at the end. */
4033    
4034          if (do_allcaps)          if (do_allcaps)
4035            {            {
4036            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            if (new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) < 0)
4037            count++;   /* Allow for full match */              goto SKIP_DATA;
4038            if (count * 2 > use_size_offsets) count = use_size_offsets/2;            count++;   /* Allow for full match */
4039            }            if (count * 2 > use_size_offsets) count = use_size_offsets/2;
4040              }
4041    
4042          /* Output the captured substrings */          /* Output the captured substrings */
4043    
4044          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
4045            {            {
4046            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
4047              {              {
4048              if (use_offsets[i] != -1)              if (use_offsets[i] != -1)
4049                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4050                  use_offsets[i], i);                  use_offsets[i], i);
4051              if (use_offsets[i+1] != -1)              if (use_offsets[i+1] != -1)
4052                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",                fprintf(outfile, "ERROR: bad negative value %d for offset %d\n",
4053                  use_offsets[i+1], i+1);                  use_offsets[i+1], i+1);
4054              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
4055              }              }
4056            else            else
4057              {              {
4058              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
4059              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr, use_offsets[i],
4060                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
4061                if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4062              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4063              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
4064                {                {
4065                fprintf(outfile, "%2d+ ", i/2);                fprintf(outfile, "%2d+ ", i/2);
4066                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                PCHARSV(bptr, use_offsets[i+1], len - use_offsets[i+1],
4067                  outfile);                  outfile);
4068                fprintf(outfile, "\n");                fprintf(outfile, "\n");
4069                }                }
4070              }              }
4071            }            }
4072    
4073          if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);          if (markptr != NULL)
4074              {
4075              fprintf(outfile, "MK: ");
4076              PCHARSV(markptr, 0, -1, outfile);
4077              fprintf(outfile, "\n");
4078              }
4079    
4080          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4081            {            {
4082            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
4083              {              {
4084                int rc;
4085              char copybuffer[256];              char copybuffer[256];
4086              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              PCRE_COPY_SUBSTRING(rc, bptr, use_offsets, count, i,
4087                i, copybuffer, sizeof(copybuffer));                copybuffer, sizeof(copybuffer));
4088              if (rc < 0)              if (rc < 0)
4089                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
4090              else              else
4091                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);                {
4092                  fprintf(outfile, "%2dC ", i);
4093                  PCHARSV(copybuffer, 0, rc, outfile);
4094                  fprintf(outfile, " (%d)\n", rc);
4095                  }
4096              }              }
4097            }            }
4098    
4099          for (copynamesptr = copynames;          cnptr = copynames;
4100               *copynamesptr != 0;          for (;;)
              copynamesptr += (int)strlen((char*)copynamesptr) + 1)  
4101            {            {
4102              int rc;
4103            char copybuffer[256];            char copybuffer[256];
4104            int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,  
4105              count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));            if (use_pcre16)
4106                {
4107                if (*(pcre_uint16 *)cnptr == 0) break;
4108                }
4109              else
4110                {
4111                if (*(pcre_uint8 *)cnptr == 0) break;
4112                }
4113    
4114              PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4115                cnptr, copybuffer, sizeof(copybuffer));
4116    
4117            if (rc < 0)            if (rc < 0)
4118              fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);              {
4119                fprintf(outfile, "copy substring ");
4120                PCHARSV(cnptr, 0, -1, outfile);
4121                fprintf(outfile, " failed %d\n", rc);
4122                }
4123            else            else
4124              fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);              {
4125                fprintf(outfile, "  C ");
4126                PCHARSV(copybuffer, 0, rc, outfile);
4127                fprintf(outfile, " (%d) ", rc);
4128                PCHARSV(cnptr, 0, -1, outfile);
4129                putc('\n', outfile);
4130                }
4131    
4132              cnptr = (char *)cnptr + (STRLEN(cnptr) + 1) * CHAR_SIZE;
4133            }            }
4134    
4135          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
4136            {            {
4137            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
4138              {              {
4139                int rc;
4140              const char *substring;              const char *substring;
4141              int rc = pcre_get_substring((char *)bptr, use_offsets, count,              PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, &substring);
               i, &substring);  
4142              if (rc < 0)              if (rc < 0)
4143                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
4144              else              else
4145                {                {
4146                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG ", i);
4147                pcre_free_substring(substring);                PCHARSV(substring, 0, rc, outfile);
4148                  fprintf(outfile, " (%d)\n", rc);
4149                  PCRE_FREE_SUBSTRING(substring);
4150                }                }
4151              }              }
4152            }            }
4153    
4154          for (getnamesptr = getnames;          gnptr = getnames;
4155               *getnamesptr != 0;          for (;;)
              getnamesptr += (int)strlen((char*)getnamesptr) + 1)  
4156            {            {
4157              int rc;
4158            const char *substring;            const char *substring;
4159            int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,  
4160              count, (char *)getnamesptr, &substring);            if (use_pcre16)
4161                {
4162                if (*(pcre_uint16 *)gnptr == 0) break;
4163                }
4164              else
4165                {
4166                if (*(pcre_uint8 *)gnptr == 0) break;
4167                }
4168    
4169              PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, use_offsets, count,
4170                gnptr, &substring);
4171            if (rc < 0)            if (rc < 0)
4172              fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);              {
4173                fprintf(outfile, "get substring ");
4174                PCHARSV(gnptr, 0, -1, outfile);
4175                fprintf(outfile, " failed %d\n", rc);
4176                }
4177            else            else
4178              {              {
4179              fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);              fprintf(outfile, "  G ");
4180              pcre_free_substring(substring);              PCHARSV(substring, 0, rc, outfile);
4181                fprintf(outfile, " (%d) ", rc);
4182                PCHARSV(gnptr, 0, -1, outfile);
4183                PCRE_FREE_SUBSTRING(substring);
4184                putc('\n', outfile);
4185              }              }
4186    
4187              gnptr = (char *)gnptr + (STRLEN(gnptr) + 1) * CHAR_SIZE;
4188            }            }
4189    
4190          if (getlist)          if (getlist)
4191            {            {
4192              int rc;
4193            const char **stringlist;            const char **stringlist;
4194            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,            PCRE_GET_SUBSTRING_LIST(rc, bptr, use_offsets, count, &stringlist);
             &stringlist);  
4195            if (rc < 0)            if (rc < 0)
4196              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
4197            else            else
4198              {              {
4199              for (i = 0; i < count; i++)              for (i = 0; i < count; i++)
4200                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                {
4201                  fprintf(outfile, "%2dL ", i);
4202                  PCHARSV(stringlist[i], 0, -1, outfile);
4203                  putc('\n', outfile);
4204                  }
4205              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
4206                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
4207              /* free((void *)stringlist); */              PCRE_FREE_SUBSTRING_LIST(stringlist);
             pcre_free_substring_list(stringlist);  
4208              }              }
4209            }            }
4210          }          }
# Line 2873  while (!done) Line 4214  while (!done)
4214        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
4215          {          {
4216          if (markptr == NULL) fprintf(outfile, "Partial match");          if (markptr == NULL) fprintf(outfile, "Partial match");
4217            else fprintf(outfile, "Partial match, mark=%s", markptr);          else
4218              {
4219              fprintf(outfile, "Partial match, mark=");
4220              PCHARSV(markptr, 0, -1, outfile);
4221              }
4222          if (use_size_offsets > 1)          if (use_size_offsets > 1)
4223            {            {
4224            fprintf(outfile, ": ");            fprintf(outfile, ": ");
4225            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr, use_offsets[0], use_offsets[1] - use_offsets[0],
4226              outfile);              outfile);
4227            }            }
4228            if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4229          fprintf(outfile, "\n");          fprintf(outfile, "\n");
4230          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
4231          }          }
# Line 2894  while (!done) Line 4240  while (!done)
4240        terminated by CRLF, an advance of one character just passes the \r,        terminated by CRLF, an advance of one character just passes the \r,
4241        whereas we should prefer the longer newline sequence, as does the code in        whereas we should prefer the longer newline sequence, as does the code in
4242        pcre_exec(). Fudge the offset value to achieve this. We check for a        pcre_exec(). Fudge the offset value to achieve this. We check for a
4243        newline setting in the pattern; if none was set, use pcre_config() to        newline setting in the pattern; if none was set, use PCRE_CONFIG() to
4244        find the default.        find the default.
4245    
4246        Otherwise, in the case of UTF-8 matching, the advance must be one        Otherwise, in the case of UTF-8 matching, the advance must be one
# Line 2905  while (!done) Line 4251  while (!done)
4251          if (g_notempty != 0)          if (g_notempty != 0)
4252            {            {
4253            int onechar = 1;            int onechar = 1;
4254            unsigned int obits = ((real_pcre *)re)->options;            unsigned int obits = ((REAL_PCRE *)re)->options;
4255            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
4256            if ((obits & PCRE_NEWLINE_BITS) == 0)            if ((obits & PCRE_NEWLINE_BITS) == 0)
4257              {              {
4258              int d;              int d;
4259              (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);              (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &d);
4260              /* Note that these values are always the ASCII ones, even in              /* Note that these values are always the ASCII ones, even in
4261              EBCDIC environments. CR = 13, NL = 10. */              EBCDIC environments. CR = 13, NL = 10. */
4262              obits = (d == 13)? PCRE_NEWLINE_CR :              obits = (d == 13)? PCRE_NEWLINE_CR :
# Line 2924  while (!done) Line 4270  while (!done)
4270                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)                 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
4271                &&                &&
4272                start_offset < len - 1 &&                start_offset < len - 1 &&
4273                bptr[start_offset] == '\r' &&  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
4274                bptr[start_offset+1] == '\n')                (use_pcre16?
4275                       ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4276                    && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4277                  :
4278                       bptr[start_offset] == '\r'
4279                    && bptr[start_offset + 1] == '\n')
4280    #elif defined SUPPORT_PCRE16
4281                     ((PCRE_SPTR16)bptr)[start_offset] == '\r'
4282                  && ((PCRE_SPTR16)bptr)[start_offset + 1] == '\n'
4283    #else
4284                     bptr[start_offset] == '\r'
4285                  && bptr[start_offset + 1] == '\n'
4286    #endif
4287                  )
4288              onechar++;              onechar++;
4289            else if (use_utf8)            else if (use_utf)
4290              {              {
4291              while (start_offset + onechar < len)              while (start_offset + onechar < len)
4292                {                {
# Line 2940  while (!done) Line 4299  while (!done)
4299          else          else
4300            {            {
4301            switch(count)            switch(count)
4302              {              {
4303              case PCRE_ERROR_NOMATCH:              case PCRE_ERROR_NOMATCH:
4304              if (gmatched == 0)              if (gmatched == 0)
4305                {                {
4306                if (markptr == NULL) fprintf(outfile, "No match\n");                if (markptr == NULL)
4307                  else fprintf(outfile, "No match, mark = %s\n", markptr);                  {
4308                    fprintf(outfile, "No match");
4309                    }
4310                  else
4311                    {
4312                    fprintf(outfile, "No match, mark = ");
4313                    PCHARSV(markptr, 0, -1, outfile);
4314                    }
4315                  if (verify_jit && jit_was_used) fprintf(outfile, " (JIT)");
4316                  putc('\n', outfile);
4317                }                }
4318              break;              break;
4319    
4320              case PCRE_ERROR_BADUTF8:              case PCRE_ERROR_BADUTF8:
4321              case PCRE_ERROR_SHORTUTF8:              case PCRE_ERROR_SHORTUTF8:
4322              fprintf(outfile, "Error %d (%s UTF-8 string)", count,              fprintf(outfile, "Error %d (%s UTF-%s string)", count,
4323                (count == PCRE_ERROR_BADUTF8)? "bad" : "short");                (count == PCRE_ERROR_BADUTF8)? "bad" : "short",
4324                  use_pcre16? "16" : "8");
4325              if (use_size_offsets >= 2)              if (use_size_offsets >= 2)
4326                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],                fprintf(outfile, " offset=%d reason=%d", use_offsets[0],
4327                  use_offsets[1]);                  use_offsets[1]);
4328              fprintf(outfile, "\n");              fprintf(outfile, "\n");
4329              break;              break;
4330    
4331                case PCRE_ERROR_BADUTF8_OFFSET:
4332                fprintf(outfile, "Error %d (bad UTF-%s offset)\n", count,
4333                  use_pcre16? "16" : "8");
4334                break;
4335    
4336              default:              default:
4337              if (count < 0 && (-count) < sizeof(errtexts)/sizeof(const char *))              if (count < 0 &&
4338                    (-count) < (int)(sizeof(errtexts)/sizeof(const char *)))
4339                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);                fprintf(outfile, "Error %d (%s)\n", count, errtexts[-count]);
4340              else              else
4341                fprintf(outfile, "Error %d (Unexpected value)\n", count);                fprintf(outfile, "Error %d (Unexpected value)\n", count);
4342              break;              break;
4343              }              }
4344    
4345            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
4346            }            }
4347          }          }
# Line 2998  while (!done) Line 4373  while (!done)
4373    
4374        else        else
4375          {          {
4376          bptr += use_offsets[1];          bptr += use_offsets[1] * CHAR_SIZE;
4377          len -= use_offsets[1];          len -= use_offsets[1];
4378          }          }
4379        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
# Line 3013  while (!done) Line 4388  while (!done)
4388  #endif  #endif
4389    
4390    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
4391    if (extra != NULL) new_free(extra);    if (extra != NULL)
4392        {
4393        PCRE_FREE_STUDY(extra);
4394        }
4395    if (locale_set)    if (locale_set)
4396      {      {
4397      new_free((void *)tables);      new_free((void *)tables);
4398      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
4399      locale_set = 0;      locale_set = 0;
4400      }      }
4401      if (jit_stack != NULL)
4402        {
4403        PCRE_JIT_STACK_FREE(jit_stack);
4404        jit_stack = NULL;
4405        }
4406    }    }
4407    
4408  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
# Line 3034  free(dbuffer); Line 4417  free(dbuffer);
4417  free(pbuffer);  free(pbuffer);
4418  free(offsets);  free(offsets);
4419    
4420    #ifdef SUPPORT_PCRE16
4421    if (buffer16 != NULL) free(buffer16);
4422    #endif
4423    
4424  return yield;  return yield;
4425  }  }
4426    

Legend:
Removed from v.645  
changed lines
  Added in v.923

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12