/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC revision 923 by ph10, Tue Feb 21 13:25:05 2012 UTC
# Line 1  Line 1 
1  /*************************************************  /*.************************************************
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 35  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
37  */  */
38    
39    /* This program now supports the testing of both the 8-bit and 16-bit PCRE
40    libraries in a single program. This is different from the modules such as
41    pcre_compile.c in the library itself, which are compiled separately for each
42    mode. If both modes are enabled, for example, pcre_compile.c is compiled twice
43    (the second time with COMPILE_PCRE16 defined). By contrast, pcretest.c is
44    compiled only once. Therefore, it must not make use of any of the macros from
45    pcre_internal.h that depend on COMPILE_PCRE8 or COMPILE_PCRE16. It does,
46    however, make use of SUPPORT_PCRE8 and SUPPORT_PCRE16 to ensure that it calls
47    only supported library functions. */
48    
49    
50    #ifdef HAVE_CONFIG_H
51    #include "config.h"
52    #endif
53    
54  #include <ctype.h>  #include <ctype.h>
55  #include <stdio.h>  #include <stdio.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 59  POSSIBILITY OF SUCH DAMAGE.
59  #include <locale.h>  #include <locale.h>
60  #include <errno.h>  #include <errno.h>
61    
62    #ifdef SUPPORT_LIBREADLINE
63    #ifdef HAVE_UNISTD_H
64    #include <unistd.h>
65    #endif
66    #include <readline/readline.h>
67    #include <readline/history.h>
68    #endif
69    
70    
71  /* A number of things vary for Windows builds. Originally, pcretest opened its  /* A number of things vary for Windows builds. Originally, pcretest opened its
72  input and output without "b"; then I was told that "b" was needed in some  input and output without "b"; then I was told that "b" was needed in some
# Line 59  input mode under Windows. */ Line 82  input mode under Windows. */
82  #define INPUT_MODE   "r"  #define INPUT_MODE   "r"
83  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
84    
85    #ifndef isatty
86    #define isatty _isatty         /* This is what Windows calls them, I'm told, */
87    #endif                         /* though in some environments they seem to   */
88                                   /* be already defined, hence the #ifndefs.    */
89    #ifndef fileno
90    #define fileno _fileno
91    #endif
92    
93    /* A user sent this fix for Borland Builder 5 under Windows. */
94    
95    #ifdef __BORLANDC__
96    #define _setmode(handle, mode) setmode(handle, mode)
97    #endif
98    
99    /* Not Windows */
100    
101  #else  #else
102  #include <sys/time.h>          /* These two includes are needed */  #include <sys/time.h>          /* These two includes are needed */
103  #include <sys/resource.h>      /* for setrlimit(). */  #include <sys/resource.h>      /* for setrlimit(). */
# Line 66  input mode under Windows. */ Line 105  input mode under Windows. */
105  #define OUTPUT_MODE  "wb"  #define OUTPUT_MODE  "wb"
106  #endif  #endif
107    
108    #define PRIV(name) name
109    
110  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* We have to include pcre_internal.h because we need the internal info for
111    displaying the results of pcre_study() and we also need to know about the
112  /* We include pcre_internal.h because we need the internal info for displaying  internal macros, structures, and other internal data values; pcretest has
113  the results of pcre_study() and we also need to know about the internal  "inside information" compared to a program that strictly follows the PCRE API.
114  macros, structures, and other internal data values; pcretest has "inside  
115  information" compared to a program that strictly follows the PCRE API. */  Although pcre_internal.h does itself include pcre.h, we explicitly include it
116    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
117    appropriately for an application, not for building PCRE. */
118    
119    #include "pcre.h"
120    
121    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8
122    /* Configure internal macros to 16 bit mode. */
123    #define COMPILE_PCRE16
124    #endif
125    
126  #include "pcre_internal.h"  #include "pcre_internal.h"
127    
128  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* The pcre_printint() function, which prints the internal form of a compiled
129  two copies, we include the source file here, changing the names of the external  regex, is held in a separate file so that (a) it can be compiled in either
130  symbols to prevent clashes. */  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
131    when that is compiled in debug mode. */
 #define _pcre_utf8_table1      utf8_table1  
 #define _pcre_utf8_table1_size utf8_table1_size  
 #define _pcre_utf8_table2      utf8_table2  
 #define _pcre_utf8_table3      utf8_table3  
 #define _pcre_utf8_table4      utf8_table4  
 #define _pcre_utt              utt  
 #define _pcre_utt_size         utt_size  
 #define _pcre_OP_lengths       OP_lengths  
132    
133  #include "pcre_tables.c"  #ifdef SUPPORT_PCRE8
134    void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
135    #endif
136    #ifdef SUPPORT_PCRE16
137    void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
138    #endif
139    
140  /* We also need the pcre_printint() function for printing out compiled  /* We need access to some of the data tables that PCRE uses. So as not to have
141  patterns. This function is in a separate file so that it can be included in  to keep two copies, we include the source file here, changing the names of the
142  pcre_compile.c when that module is compiled with debugging enabled.  external symbols to prevent clashes. */
143    
144  The definition of the macro PRINTABLE, which determines whether to print an  #define PCRE_INCLUDED
145    
146    #include "pcre_tables.c"
147    
148    /* The definition of the macro PRINTABLE, which determines whether to print an
149  output character as-is or as a hex value when showing compiled patterns, is  output character as-is or as a hex value when showing compiled patterns, is
150  contained in this file. We uses it here also, in cases when the locale has not  the same as in the printint.src file. We uses it here in cases when the locale
151  been explicitly changed, so as to get consistent output from systems that  has not been explicitly changed, so as to get consistent output from systems
152  differ in their output from isprint() even in the "C" locale. */  that differ in their output from isprint() even in the "C" locale. */
153    
154  #include "pcre_printint.src"  #ifdef EBCDIC
155    #define PRINTABLE(c) ((c) >= 64 && (c) < 255)
156    #else
157    #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
158    #endif
159    
160  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
161    
162    /* Posix support is disabled in 16 bit only mode. */
163    #if defined SUPPORT_PCRE16 && !defined SUPPORT_PCRE8 && !defined NOPOSIX
164    #define NOPOSIX
165    #endif
166    
167  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
168  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 114  Makefile. */ Line 172  Makefile. */
172  #include "pcreposix.h"  #include "pcreposix.h"
173  #endif  #endif
174    
175  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, originally for the benefit of a version that was
176  build pcretest without support for UTF8 (define NOUTF8), without the interface  imported into Exim, to build pcretest without support for UTF8 or UTF16 (define
177  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  NOUTF), without the interface to the DFA matcher (NODFA). In fact, we
178  function (define NOINFOCHECK). */  automatically cut out the UTF support if PCRE is built without it. */
179    
180    #ifndef SUPPORT_UTF
181    #ifndef NOUTF
182    #define NOUTF
183    #endif
184    #endif
185    
186    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
187    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
188    only from one place and is handled differently). I couldn't dream up any way of
189    using a single macro to do this in a generic way, because of the many different
190    argument requirements. We know that at least one of SUPPORT_PCRE8 and
191    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
192    use these in the definitions of generic macros.
193    
194    **** Special note about the PCHARSxxx macros: the address of the string to be
195    printed is always given as two arguments: a base address followed by an offset.
196    The base address is cast to the correct data size for 8 or 16 bit data; the
197    offset is in units of this size. If the string were given as base+offset in one
198    argument, the casting might be incorrectly applied. */
199    
200    #ifdef SUPPORT_PCRE8
201    
202    #define PCHARS8(lv, p, offset, len, f) \
203      lv = pchars((pcre_uint8 *)(p) + offset, len, f)
204    
205    #define PCHARSV8(p, offset, len, f) \
206      (void)pchars((pcre_uint8 *)(p) + offset, len, f)
207    
208    #define READ_CAPTURE_NAME8(p, cn8, cn16, re) \
209      p = read_capture_name8(p, cn8, re)
210    
211    #define STRLEN8(p) ((int)strlen((char *)p))
212    
213    #define SET_PCRE_CALLOUT8(callout) \
214      pcre_callout = callout
215    
216    #define PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata) \
217       pcre_assign_jit_stack(extra, callback, userdata)
218    
219    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
220      re = pcre_compile((char *)pat, options, error, erroffset, tables)
221    
222    #define PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
223        namesptr, cbuffer, size) \
224      rc = pcre_copy_named_substring(re, (char *)bptr, offsets, count, \
225        (char *)namesptr, cbuffer, size)
226    
227    #define PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size) \
228      rc = pcre_copy_substring((char *)bptr, offsets, count, i, cbuffer, size)
229    
230    #define PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
231        offsets, size_offsets, workspace, size_workspace) \
232      count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset, options, \
233        offsets, size_offsets, workspace, size_workspace)
234    
235    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
236        offsets, size_offsets) \
237      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
238        offsets, size_offsets)
239    
240    #define PCRE_FREE_STUDY8(extra) \
241      pcre_free_study(extra)
242    
243    #define PCRE_FREE_SUBSTRING8(substring) \
244      pcre_free_substring(substring)
245    
246    #define PCRE_FREE_SUBSTRING_LIST8(listptr) \
247      pcre_free_substring_list(listptr)
248    
249    #define PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
250        getnamesptr, subsptr) \
251      rc = pcre_get_named_substring(re, (char *)bptr, offsets, count, \
252        (char *)getnamesptr, subsptr)
253    
254    #define PCRE_GET_STRINGNUMBER8(n, rc, ptr) \
255      n = pcre_get_stringnumber(re, (char *)ptr)
256    
257    #define PCRE_GET_SUBSTRING8(rc, bptr, offsets, count, i, subsptr) \
258      rc = pcre_get_substring((char *)bptr, offsets, count, i, subsptr)
259    
260    #define PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr) \
261      rc = pcre_get_substring_list((const char *)bptr, offsets, count, listptr)
262    
263    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables) \
264      rc = pcre_pattern_to_host_byte_order(re, extra, tables)
265    
266    #define PCRE_PRINTINT8(re, outfile, debug_lengths) \
267      pcre_printint(re, outfile, debug_lengths)
268    
269    #define PCRE_STUDY8(extra, re, options, error) \
270      extra = pcre_study(re, options, error)
271    
272    #define PCRE_JIT_STACK_ALLOC8(startsize, maxsize) \
273      pcre_jit_stack_alloc(startsize, maxsize)
274    
275    #define PCRE_JIT_STACK_FREE8(stack) \
276      pcre_jit_stack_free(stack)
277    
278    #endif /* SUPPORT_PCRE8 */
279    
280    /* -----------------------------------------------------------*/
281    
282    #ifdef SUPPORT_PCRE16
283    
284    #define PCHARS16(lv, p, offset, len, f) \
285      lv = pchars16((PCRE_SPTR16)(p) + offset, len, f)
286    
287    #define PCHARSV16(p, offset, len, f) \
288      (void)pchars16((PCRE_SPTR16)(p) + offset, len, f)
289    
290    #define READ_CAPTURE_NAME16(p, cn8, cn16, re) \
291      p = read_capture_name16(p, cn16, re)
292    
293    #define STRLEN16(p) ((int)strlen16((PCRE_SPTR16)p))
294    
295    #define SET_PCRE_CALLOUT16(callout) \
296      pcre16_callout = (int (*)(pcre16_callout_block *))callout
297    
298    #define PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata) \
299      pcre16_assign_jit_stack((pcre16_extra *)extra, \
300        (pcre16_jit_callback)callback, userdata)
301    
302    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
303      re = (pcre *)pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, \
304        tables)
305    
306    #define PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
307        namesptr, cbuffer, size) \
308      rc = pcre16_copy_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
309        count, (PCRE_SPTR16)namesptr, (PCRE_UCHAR16 *)cbuffer, size/2)
310    
311    #define PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size) \
312      rc = pcre16_copy_substring((PCRE_SPTR16)bptr, offsets, count, i, \
313        (PCRE_UCHAR16 *)cbuffer, size/2)
314    
315    #define PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
316        offsets, size_offsets, workspace, size_workspace) \
317      count = pcre16_dfa_exec((pcre16 *)re, (pcre16_extra *)extra, \
318        (PCRE_SPTR16)bptr, len, start_offset, options, offsets, size_offsets, \
319        workspace, size_workspace)
320    
321    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
322        offsets, size_offsets) \
323      count = pcre16_exec((pcre16 *)re, (pcre16_extra *)extra, (PCRE_SPTR16)bptr, \
324        len, start_offset, options, offsets, size_offsets)
325    
326    #define PCRE_FREE_STUDY16(extra) \
327      pcre16_free_study((pcre16_extra *)extra)
328    
329    #define PCRE_FREE_SUBSTRING16(substring) \
330      pcre16_free_substring((PCRE_SPTR16)substring)
331    
332    #define PCRE_FREE_SUBSTRING_LIST16(listptr) \
333      pcre16_free_substring_list((PCRE_SPTR16 *)listptr)
334    
335    #define PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
336        getnamesptr, subsptr) \
337      rc = pcre16_get_named_substring((pcre16 *)re, (PCRE_SPTR16)bptr, offsets, \
338        count, (PCRE_SPTR16)getnamesptr, (PCRE_SPTR16 *)(void*)subsptr)
339    
340    #define PCRE_GET_STRINGNUMBER16(n, rc, ptr) \
341      n = pcre16_get_stringnumber(re, (PCRE_SPTR16)ptr)
342    
343    #define PCRE_GET_SUBSTRING16(rc, bptr, offsets, count, i, subsptr) \
344      rc = pcre16_get_substring((PCRE_SPTR16)bptr, offsets, count, i, \
345        (PCRE_SPTR16 *)(void*)subsptr)
346    
347    #define PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr) \
348      rc = pcre16_get_substring_list((PCRE_SPTR16)bptr, offsets, count, \
349        (PCRE_SPTR16 **)(void*)listptr)
350    
351    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables) \
352      rc = pcre16_pattern_to_host_byte_order((pcre16 *)re, (pcre16_extra *)extra, \
353        tables)
354    
355    #define PCRE_PRINTINT16(re, outfile, debug_lengths) \
356      pcre16_printint(re, outfile, debug_lengths)
357    
358    #define PCRE_STUDY16(extra, re, options, error) \
359      extra = (pcre_extra *)pcre16_study((pcre16 *)re, options, error)
360    
361    #define PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
362      (pcre_jit_stack *)pcre16_jit_stack_alloc(startsize, maxsize)
363    
364    #define PCRE_JIT_STACK_FREE16(stack) \
365      pcre16_jit_stack_free((pcre16_jit_stack *)stack)
366    
367    #endif /* SUPPORT_PCRE16 */
368    
369    
370    /* ----- Both modes are supported; a runtime test is needed, except for
371    pcre_config(), and the JIT stack functions, when it doesn't matter which
372    version is called. ----- */
373    
374    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
375    
376    #define CHAR_SIZE (use_pcre16? 2:1)
377    
378    #define PCHARS(lv, p, offset, len, f) \
379      if (use_pcre16) \
380        PCHARS16(lv, p, offset, len, f); \
381      else \
382        PCHARS8(lv, p, offset, len, f)
383    
384    #define PCHARSV(p, offset, len, f) \
385      if (use_pcre16) \
386        PCHARSV16(p, offset, len, f); \
387      else \
388        PCHARSV8(p, offset, len, f)
389    
390    #define READ_CAPTURE_NAME(p, cn8, cn16, re) \
391      if (use_pcre16) \
392        READ_CAPTURE_NAME16(p, cn8, cn16, re); \
393      else \
394        READ_CAPTURE_NAME8(p, cn8, cn16, re)
395    
396    #define SET_PCRE_CALLOUT(callout) \
397      if (use_pcre16) \
398        SET_PCRE_CALLOUT16(callout); \
399      else \
400        SET_PCRE_CALLOUT8(callout)
401    
402    #define STRLEN(p) (use_pcre16? STRLEN16(p) : STRLEN8(p))
403    
404    #define PCRE_ASSIGN_JIT_STACK(extra, callback, userdata) \
405      if (use_pcre16) \
406        PCRE_ASSIGN_JIT_STACK16(extra, callback, userdata); \
407      else \
408        PCRE_ASSIGN_JIT_STACK8(extra, callback, userdata)
409    
410    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
411      if (use_pcre16) \
412        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
413      else \
414        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
415    
416    #define PCRE_CONFIG pcre_config
417    
418    #define PCRE_COPY_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
419        namesptr, cbuffer, size) \
420      if (use_pcre16) \
421        PCRE_COPY_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
422          namesptr, cbuffer, size); \
423      else \
424        PCRE_COPY_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
425          namesptr, cbuffer, size)
426    
427    #define PCRE_COPY_SUBSTRING(rc, bptr, offsets, count, i, cbuffer, size) \
428      if (use_pcre16) \
429        PCRE_COPY_SUBSTRING16(rc, bptr, offsets, count, i, cbuffer, size); \
430      else \
431        PCRE_COPY_SUBSTRING8(rc, bptr, offsets, count, i, cbuffer, size)
432    
433    #define PCRE_DFA_EXEC(count, re, extra, bptr, len, start_offset, options, \
434        offsets, size_offsets, workspace, size_workspace) \
435      if (use_pcre16) \
436        PCRE_DFA_EXEC16(count, re, extra, bptr, len, start_offset, options, \
437          offsets, size_offsets, workspace, size_workspace); \
438      else \
439        PCRE_DFA_EXEC8(count, re, extra, bptr, len, start_offset, options, \
440          offsets, size_offsets, workspace, size_workspace)
441    
442    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
443        offsets, size_offsets) \
444      if (use_pcre16) \
445        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
446          offsets, size_offsets); \
447      else \
448        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
449          offsets, size_offsets)
450    
451    #define PCRE_FREE_STUDY(extra) \
452      if (use_pcre16) \
453        PCRE_FREE_STUDY16(extra); \
454      else \
455        PCRE_FREE_STUDY8(extra)
456    
457    #define PCRE_FREE_SUBSTRING(substring) \
458      if (use_pcre16) \
459        PCRE_FREE_SUBSTRING16(substring); \
460      else \
461        PCRE_FREE_SUBSTRING8(substring)
462    
463    #define PCRE_FREE_SUBSTRING_LIST(listptr) \
464      if (use_pcre16) \
465        PCRE_FREE_SUBSTRING_LIST16(listptr); \
466      else \
467        PCRE_FREE_SUBSTRING_LIST8(listptr)
468    
469    #define PCRE_GET_NAMED_SUBSTRING(rc, re, bptr, offsets, count, \
470        getnamesptr, subsptr) \
471      if (use_pcre16) \
472        PCRE_GET_NAMED_SUBSTRING16(rc, re, bptr, offsets, count, \
473          getnamesptr, subsptr); \
474      else \
475        PCRE_GET_NAMED_SUBSTRING8(rc, re, bptr, offsets, count, \
476          getnamesptr, subsptr)
477    
478    #define PCRE_GET_STRINGNUMBER(n, rc, ptr) \
479      if (use_pcre16) \
480        PCRE_GET_STRINGNUMBER16(n, rc, ptr); \
481      else \
482        PCRE_GET_STRINGNUMBER8(n, rc, ptr)
483    
484    #define PCRE_GET_SUBSTRING(rc, bptr, use_offsets, count, i, subsptr) \
485      if (use_pcre16) \
486        PCRE_GET_SUBSTRING16(rc, bptr, use_offsets, count, i, subsptr); \
487      else \
488        PCRE_GET_SUBSTRING8(rc, bptr, use_offsets, count, i, subsptr)
489    
490    #define PCRE_GET_SUBSTRING_LIST(rc, bptr, offsets, count, listptr) \
491      if (use_pcre16) \
492        PCRE_GET_SUBSTRING_LIST16(rc, bptr, offsets, count, listptr); \
493      else \
494        PCRE_GET_SUBSTRING_LIST8(rc, bptr, offsets, count, listptr)
495    
496    #define PCRE_JIT_STACK_ALLOC(startsize, maxsize) \
497      (use_pcre16 ? \
498         PCRE_JIT_STACK_ALLOC16(startsize, maxsize) \
499        :PCRE_JIT_STACK_ALLOC8(startsize, maxsize))
500    
501    #define PCRE_JIT_STACK_FREE(stack) \
502      if (use_pcre16) \
503        PCRE_JIT_STACK_FREE16(stack); \
504      else \
505        PCRE_JIT_STACK_FREE8(stack)
506    
507    #define PCRE_MAKETABLES \
508      (use_pcre16? pcre16_maketables() : pcre_maketables())
509    
510    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, tables) \
511      if (use_pcre16) \
512        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(rc, re, extra, tables); \
513      else \
514        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(rc, re, extra, tables)
515    
516    #define PCRE_PRINTINT(re, outfile, debug_lengths) \
517      if (use_pcre16) \
518        PCRE_PRINTINT16(re, outfile, debug_lengths); \
519      else \
520        PCRE_PRINTINT8(re, outfile, debug_lengths)
521    
522    #define PCRE_STUDY(extra, re, options, error) \
523      if (use_pcre16) \
524        PCRE_STUDY16(extra, re, options, error); \
525      else \
526        PCRE_STUDY8(extra, re, options, error)
527    
528    /* ----- Only 8-bit mode is supported ----- */
529    
530    #elif defined SUPPORT_PCRE8
531    #define CHAR_SIZE                 1
532    #define PCHARS                    PCHARS8
533    #define PCHARSV                   PCHARSV8
534    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME8
535    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT8
536    #define STRLEN                    STRLEN8
537    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK8
538    #define PCRE_COMPILE              PCRE_COMPILE8
539    #define PCRE_CONFIG               pcre_config
540    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING8
541    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING8
542    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC8
543    #define PCRE_EXEC                 PCRE_EXEC8
544    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY8
545    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING8
546    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST8
547    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING8
548    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER8
549    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING8
550    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST8
551    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC8
552    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE8
553    #define PCRE_MAKETABLES           pcre_maketables()
554    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
555    #define PCRE_PRINTINT             PCRE_PRINTINT8
556    #define PCRE_STUDY                PCRE_STUDY8
557    
558    /* ----- Only 16-bit mode is supported ----- */
559    
560    #else
561    #define CHAR_SIZE                 2
562    #define PCHARS                    PCHARS16
563    #define PCHARSV                   PCHARSV16
564    #define READ_CAPTURE_NAME         READ_CAPTURE_NAME16
565    #define SET_PCRE_CALLOUT          SET_PCRE_CALLOUT16
566    #define STRLEN                    STRLEN16
567    #define PCRE_ASSIGN_JIT_STACK     PCRE_ASSIGN_JIT_STACK16
568    #define PCRE_COMPILE              PCRE_COMPILE16
569    #define PCRE_CONFIG               pcre16_config
570    #define PCRE_COPY_NAMED_SUBSTRING PCRE_COPY_NAMED_SUBSTRING16
571    #define PCRE_COPY_SUBSTRING       PCRE_COPY_SUBSTRING16
572    #define PCRE_DFA_EXEC             PCRE_DFA_EXEC16
573    #define PCRE_EXEC                 PCRE_EXEC16
574    #define PCRE_FREE_STUDY           PCRE_FREE_STUDY16
575    #define PCRE_FREE_SUBSTRING       PCRE_FREE_SUBSTRING16
576    #define PCRE_FREE_SUBSTRING_LIST  PCRE_FREE_SUBSTRING_LIST16
577    #define PCRE_GET_NAMED_SUBSTRING  PCRE_GET_NAMED_SUBSTRING16
578    #define PCRE_GET_STRINGNUMBER     PCRE_GET_STRINGNUMBER16
579    #define PCRE_GET_SUBSTRING        PCRE_GET_SUBSTRING16
580    #define PCRE_GET_SUBSTRING_LIST   PCRE_GET_SUBSTRING_LIST16
581    #define PCRE_JIT_STACK_ALLOC      PCRE_JIT_STACK_ALLOC16
582    #define PCRE_JIT_STACK_FREE       PCRE_JIT_STACK_FREE16
583    #define PCRE_MAKETABLES           pcre16_maketables()
584    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
585    #define PCRE_PRINTINT             PCRE_PRINTINT16
586    #define PCRE_STUDY                PCRE_STUDY16
587    #endif
588    
589    /* ----- End of mode-specific function call macros ----- */
590    
591    
592  /* Other parameters */  /* Other parameters */
# Line 142  static int callout_count; Line 611  static int callout_count;
611  static int callout_extra;  static int callout_extra;
612  static int callout_fail_count;  static int callout_fail_count;
613  static int callout_fail_id;  static int callout_fail_id;
614    static int debug_lengths;
615  static int first_callout;  static int first_callout;
616  static int locale_set = 0;  static int locale_set = 0;
617  static int show_malloc;  static int show_malloc;
618  static int use_utf8;  static int use_utf;
619  static size_t gotten_store;  static size_t gotten_store;
620    static size_t first_gotten_store = 0;
621    static const unsigned char *last_callout_mark = NULL;
622    
623  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
624    
625  static int buffer_size = 50000;  static int buffer_size = 50000;
626  static uschar *buffer = NULL;  static pcre_uint8 *buffer = NULL;
627  static uschar *dbuffer = NULL;  static pcre_uint8 *dbuffer = NULL;
628  static uschar *pbuffer = NULL;  static pcre_uint8 *pbuffer = NULL;
629    
630    /* Another buffer is needed translation to 16-bit character strings. It will
631    obtained and extended as required. */
632    
633    #ifdef SUPPORT_PCRE16
634    static int buffer16_size = 0;
635    static pcre_uint16 *buffer16 = NULL;
636    
637    #ifdef SUPPORT_PCRE8
638    
639    /* We need the table of operator lengths that is used for 16-bit compiling, in
640    order to swap bytes in a pattern for saving/reloading testing. Luckily, the
641    data is defined as a macro. However, we must ensure that LINK_SIZE is adjusted
642    appropriately for the 16-bit world. Just as a safety check, make sure that
643    COMPILE_PCRE16 is *not* set. */
644    
645    #ifdef COMPILE_PCRE16
646    #error COMPILE_PCRE16 must not be set when compiling pcretest.c
647    #endif
648    
649    #if LINK_SIZE == 2
650    #undef LINK_SIZE
651    #define LINK_SIZE 1
652    #elif LINK_SIZE == 3 || LINK_SIZE == 4
653    #undef LINK_SIZE
654    #define LINK_SIZE 2
655    #else
656    #error LINK_SIZE must be either 2, 3, or 4
657    #endif
658    
659    #undef IMM2_SIZE
660    #define IMM2_SIZE 1
661    
662    #endif /* SUPPORT_PCRE8 */
663    
664    static const pcre_uint16 OP_lengths16[] = { OP_LENGTHS };
665    #endif  /* SUPPORT_PCRE16 */
666    
667    /* If we have 8-bit support, default use_pcre16 to false; if there is also
668    16-bit support, it can be changed by an option. If there is no 8-bit support,
669    there must be 16-bit support, so default it to 1. */
670    
671    #ifdef SUPPORT_PCRE8
672    static int use_pcre16 = 0;
673    #else
674    static int use_pcre16 = 1;
675    #endif
676    
677    /* JIT study options for -s+n and /S+n where '1' <= n <= '7'. */
678    
679    static int jit_study_bits[] =
680      {
681      PCRE_STUDY_JIT_COMPILE,
682      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
683      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE,
684      PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
685      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
686      PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE + PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE,
687      PCRE_STUDY_JIT_COMPILE + PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE +
688        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE
689    };
690    
691    /* Textual explanations for runtime error codes */
692    
693    static const char *errtexts[] = {
694      NULL,  /* 0 is no error */
695      NULL,  /* NOMATCH is handled specially */
696      "NULL argument passed",
697      "bad option value",
698      "magic number missing",
699      "unknown opcode - pattern overwritten?",
700      "no more memory",
701      NULL,  /* never returned by pcre_exec() or pcre_dfa_exec() */
702      "match limit exceeded",
703      "callout error code",
704      NULL,  /* BADUTF8/16 is handled specially */
705      NULL,  /* BADUTF8/16 offset is handled specially */
706      NULL,  /* PARTIAL is handled specially */
707      "not used - internal error",
708      "internal error - pattern overwritten?",
709      "bad count value",
710      "item unsupported for DFA matching",
711      "backreference condition or recursion test not supported for DFA matching",
712      "match limit not supported for DFA matching",
713      "workspace size exceeded in DFA matching",
714      "too much recursion for DFA matching",
715      "recursion limit exceeded",
716      "not used - internal error",
717      "invalid combination of newline options",
718      "bad offset value",
719      NULL,  /* SHORTUTF8/16 is handled specially */
720      "nested recursion at the same subject position",
721      "JIT stack limit reached",
722      "pattern compiled in wrong mode: 8-bit/16-bit error"
723    };
724    
725    
726    /*************************************************
727    *         Alternate character tables             *
728    *************************************************/
729    
730    /* By default, the "tables" pointer when calling PCRE is set to NULL, thereby
731    using the default tables of the library. However, the T option can be used to
732    select alternate sets of tables, for different kinds of testing. Note also that
733    the L (locale) option also adjusts the tables. */
734    
735    /* This is the set of tables distributed as default with PCRE. It recognizes
736    only ASCII characters. */
737    
738    static const pcre_uint8 tables0[] = {
739    
740    /* This table is a lower casing table. */
741    
742        0,  1,  2,  3,  4,  5,  6,  7,
743        8,  9, 10, 11, 12, 13, 14, 15,
744       16, 17, 18, 19, 20, 21, 22, 23,
745       24, 25, 26, 27, 28, 29, 30, 31,
746       32, 33, 34, 35, 36, 37, 38, 39,
747       40, 41, 42, 43, 44, 45, 46, 47,
748       48, 49, 50, 51, 52, 53, 54, 55,
749       56, 57, 58, 59, 60, 61, 62, 63,
750       64, 97, 98, 99,100,101,102,103,
751      104,105,106,107,108,109,110,111,
752      112,113,114,115,116,117,118,119,
753      120,121,122, 91, 92, 93, 94, 95,
754       96, 97, 98, 99,100,101,102,103,
755      104,105,106,107,108,109,110,111,
756      112,113,114,115,116,117,118,119,
757      120,121,122,123,124,125,126,127,
758      128,129,130,131,132,133,134,135,
759      136,137,138,139,140,141,142,143,
760      144,145,146,147,148,149,150,151,
761      152,153,154,155,156,157,158,159,
762      160,161,162,163,164,165,166,167,
763      168,169,170,171,172,173,174,175,
764      176,177,178,179,180,181,182,183,
765      184,185,186,187,188,189,190,191,
766      192,193,194,195,196,197,198,199,
767      200,201,202,203,204,205,206,207,
768      208,209,210,211,212,213,214,215,
769      216,217,218,219,220,221,222,223,
770      224,225,226,227,228,229,230,231,
771      232,233,234,235,236,237,238,239,
772      240,241,242,243,244,245,246,247,
773      248,249,250,251,252,253,254,255,
774    
775    /* This table is a case flipping table. */
776    
777        0,  1,  2,  3,  4,  5,  6,  7,
778        8,  9, 10, 11, 12, 13, 14, 15,
779       16, 17, 18, 19, 20, 21, 22, 23,
780       24, 25, 26, 27, 28, 29, 30, 31,
781       32, 33, 34, 35, 36, 37, 38, 39,
782       40, 41, 42, 43, 44, 45, 46, 47,
783       48, 49, 50, 51, 52, 53, 54, 55,
784       56, 57, 58, 59, 60, 61, 62, 63,
785       64, 97, 98, 99,100,101,102,103,
786      104,105,106,107,108,109,110,111,
787      112,113,114,115,116,117,118,119,
788      120,121,122, 91, 92, 93, 94, 95,
789       96, 65, 66, 67, 68, 69, 70, 71,
790       72, 73, 74, 75, 76, 77, 78, 79,
791       80, 81, 82, 83, 84, 85, 86, 87,
792       88, 89, 90,123,124,125,126,127,
793      128,129,130,131,132,133,134,135,
794      136,137,138,139,140,141,142,143,
795      144,145,146,147,148,149,150,151,
796      152,153,154,155,156,157,158,159,
797      160,161,162,163,164,165,166,167,
798      168,169,170,171,172,173,174,175,
799      176,177,178,179,180,181,182,183,
800      184,185,186,187,188,189,190,191,
801      192,193,194,195,196,197,198,199,
802      200,201,202,203,204,205,206,207,
803      208,209,210,211,212,213,214,215,
804      216,217,218,219,220,221,222,223,
805      224,225,226,227,228,229,230,231,
806      232,233,234,235,236,237,238,239,
807      240,241,242,243,244,245,246,247,
808      248,249,250,251,252,253,254,255,
809    
810    /* This table contains bit maps for various character classes. Each map is 32
811    bytes long and the bits run from the least significant end of each byte. The
812    classes that have their own maps are: space, xdigit, digit, upper, lower, word,
813    graph, print, punct, and cntrl. Other classes are built from combinations. */
814    
815      0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
816      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
817      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
818      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
819    
820      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
821      0x7e,0x00,0x00,0x00,0x7e,0x00,0x00,0x00,
822      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
823      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
824    
825      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
826      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
827      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
828      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
829    
830      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
831      0xfe,0xff,0xff,0x07,0x00,0x00,0x00,0x00,
832      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
833      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
834    
835      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
836      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0x07,
837      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
838      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
839    
840      0x00,0x00,0x00,0x00,0x00,0x00,0xff,0x03,
841      0xfe,0xff,0xff,0x87,0xfe,0xff,0xff,0x07,
842      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
843      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
844    
845      0x00,0x00,0x00,0x00,0xfe,0xff,0xff,0xff,
846      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
847      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
848      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
849    
850      0x00,0x00,0x00,0x00,0xff,0xff,0xff,0xff,
851      0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x7f,
852      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
853      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
854    
855      0x00,0x00,0x00,0x00,0xfe,0xff,0x00,0xfc,
856      0x01,0x00,0x00,0xf8,0x01,0x00,0x00,0x78,
857      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
858      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
859    
860      0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,
861      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x80,
862      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
863      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
864    
865    /* This table identifies various classes of character by individual bits:
866      0x01   white space character
867      0x02   letter
868      0x04   decimal digit
869      0x08   hexadecimal digit
870      0x10   alphanumeric or '_'
871      0x80   regular expression metacharacter or binary zero
872    */
873    
874      0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
875      0x00,0x01,0x01,0x00,0x01,0x01,0x00,0x00, /*   8- 15 */
876      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  16- 23 */
877      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  24- 31 */
878      0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /*    - '  */
879      0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /*  ( - /  */
880      0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /*  0 - 7  */
881      0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /*  8 - ?  */
882      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  @ - G  */
883      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  H - O  */
884      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  P - W  */
885      0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /*  X - _  */
886      0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /*  ` - g  */
887      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  h - o  */
888      0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /*  p - w  */
889      0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /*  x -127 */
890      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
891      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
892      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */
893      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 152-159 */
894      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160-167 */
895      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 168-175 */
896      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 176-183 */
897      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */
898      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 192-199 */
899      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 200-207 */
900      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 208-215 */
901      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 216-223 */
902      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 224-231 */
903      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 232-239 */
904      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
905      0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
906    
907    /* This is a set of tables that came orginally from a Windows user. It seems to
908    be at least an approximation of ISO 8859. In particular, there are characters
909    greater than 128 that are marked as spaces, letters, etc. */
910    
911    static const pcre_uint8 tables1[] = {
912    0,1,2,3,4,5,6,7,
913    8,9,10,11,12,13,14,15,
914    16,17,18,19,20,21,22,23,
915    24,25,26,27,28,29,30,31,
916    32,33,34,35,36,37,38,39,
917    40,41,42,43,44,45,46,47,
918    48,49,50,51,52,53,54,55,
919    56,57,58,59,60,61,62,63,
920    64,97,98,99,100,101,102,103,
921    104,105,106,107,108,109,110,111,
922    112,113,114,115,116,117,118,119,
923    120,121,122,91,92,93,94,95,
924    96,97,98,99,100,101,102,103,
925    104,105,106,107,108,109,110,111,
926    112,113,114,115,116,117,118,119,
927    120,121,122,123,124,125,126,127,
928    128,129,130,131,132,133,134,135,
929    136,137,138,139,140,141,142,143,
930    144,145,146,147,148,149,150,151,
931    152,153,154,155,156,157,158,159,
932    160,161,162,163,164,165,166,167,
933    168,169,170,171,172,173,174,175,
934    176,177,178,179,180,181,182,183,
935    184,185,186,187,188,189,190,191,
936    224,225,226,227,228,229,230,231,
937    232,233,234,235,236,237,238,239,
938    240,241,242,243,244,245,246,215,
939    248,249,250,251,252,253,254,223,
940    224,225,226,227,228,229,230,231,
941    232,233,234,235,236,237,238,239,
942    240,241,242,243,244,245,246,247,
943    248,249,250,251,252,253,254,255,
944    0,1,2,3,4,5,6,7,
945    8,9,10,11,12,13,14,15,
946    16,17,18,19,20,21,22,23,
947    24,25,26,27,28,29,30,31,
948    32,33,34,35,36,37,38,39,
949    40,41,42,43,44,45,46,47,
950    48,49,50,51,52,53,54,55,
951    56,57,58,59,60,61,62,63,
952    64,97,98,99,100,101,102,103,
953    104,105,106,107,108,109,110,111,
954    112,113,114,115,116,117,118,119,
955    120,121,122,91,92,93,94,95,
956    96,65,66,67,68,69,70,71,
957    72,73,74,75,76,77,78,79,
958    80,81,82,83,84,85,86,87,
959    88,89,90,123,124,125,126,127,
960    128,129,130,131,132,133,134,135,
961    136,137,138,139,140,141,142,143,
962    144,145,146,147,148,149,150,151,
963    152,153,154,155,156,157,158,159,
964    160,161,162,163,164,165,166,167,
965    168,169,170,171,172,173,174,175,
966    176,177,178,179,180,181,182,183,
967    184,185,186,187,188,189,190,191,
968    224,225,226,227,228,229,230,231,
969    232,233,234,235,236,237,238,239,
970    240,241,242,243,244,245,246,215,
971    248,249,250,251,252,253,254,223,
972    192,193,194,195,196,197,198,199,
973    200,201,202,203,204,205,206,207,
974    208,209,210,211,212,213,214,247,
975    216,217,218,219,220,221,222,255,
976    0,62,0,0,1,0,0,0,
977    0,0,0,0,0,0,0,0,
978    32,0,0,0,1,0,0,0,
979    0,0,0,0,0,0,0,0,
980    0,0,0,0,0,0,255,3,
981    126,0,0,0,126,0,0,0,
982    0,0,0,0,0,0,0,0,
983    0,0,0,0,0,0,0,0,
984    0,0,0,0,0,0,255,3,
985    0,0,0,0,0,0,0,0,
986    0,0,0,0,0,0,12,2,
987    0,0,0,0,0,0,0,0,
988    0,0,0,0,0,0,0,0,
989    254,255,255,7,0,0,0,0,
990    0,0,0,0,0,0,0,0,
991    255,255,127,127,0,0,0,0,
992    0,0,0,0,0,0,0,0,
993    0,0,0,0,254,255,255,7,
994    0,0,0,0,0,4,32,4,
995    0,0,0,128,255,255,127,255,
996    0,0,0,0,0,0,255,3,
997    254,255,255,135,254,255,255,7,
998    0,0,0,0,0,4,44,6,
999    255,255,127,255,255,255,127,255,
1000    0,0,0,0,254,255,255,255,
1001    255,255,255,255,255,255,255,127,
1002    0,0,0,0,254,255,255,255,
1003    255,255,255,255,255,255,255,255,
1004    0,2,0,0,255,255,255,255,
1005    255,255,255,255,255,255,255,127,
1006    0,0,0,0,255,255,255,255,
1007    255,255,255,255,255,255,255,255,
1008    0,0,0,0,254,255,0,252,
1009    1,0,0,248,1,0,0,120,
1010    0,0,0,0,254,255,255,255,
1011    0,0,128,0,0,0,128,0,
1012    255,255,255,255,0,0,0,0,
1013    0,0,0,0,0,0,0,128,
1014    255,255,255,255,0,0,0,0,
1015    0,0,0,0,0,0,0,0,
1016    128,0,0,0,0,0,0,0,
1017    0,1,1,0,1,1,0,0,
1018    0,0,0,0,0,0,0,0,
1019    0,0,0,0,0,0,0,0,
1020    1,0,0,0,128,0,0,0,
1021    128,128,128,128,0,0,128,0,
1022    28,28,28,28,28,28,28,28,
1023    28,28,0,0,0,0,0,128,
1024    0,26,26,26,26,26,26,18,
1025    18,18,18,18,18,18,18,18,
1026    18,18,18,18,18,18,18,18,
1027    18,18,18,128,128,0,128,16,
1028    0,26,26,26,26,26,26,18,
1029    18,18,18,18,18,18,18,18,
1030    18,18,18,18,18,18,18,18,
1031    18,18,18,128,128,0,0,0,
1032    0,0,0,0,0,1,0,0,
1033    0,0,0,0,0,0,0,0,
1034    0,0,0,0,0,0,0,0,
1035    0,0,0,0,0,0,0,0,
1036    1,0,0,0,0,0,0,0,
1037    0,0,18,0,0,0,0,0,
1038    0,0,20,20,0,18,0,0,
1039    0,20,18,0,0,0,0,0,
1040    18,18,18,18,18,18,18,18,
1041    18,18,18,18,18,18,18,18,
1042    18,18,18,18,18,18,18,0,
1043    18,18,18,18,18,18,18,18,
1044    18,18,18,18,18,18,18,18,
1045    18,18,18,18,18,18,18,18,
1046    18,18,18,18,18,18,18,0,
1047    18,18,18,18,18,18,18,18
1048    };
1049    
1050    
1051    
1052    
1053    #ifndef HAVE_STRERROR
1054    /*************************************************
1055    *     Provide strerror() for non-ANSI libraries  *
1056    *************************************************/
1057    
1058    /* Some old-fashioned systems still around (e.g. SunOS4) don't have strerror()
1059    in their libraries, but can provide the same facility by this simple
1060    alternative function. */
1061    
1062    extern int   sys_nerr;
1063    extern char *sys_errlist[];
1064    
1065    char *
1066    strerror(int n)
1067    {
1068    if (n < 0 || n >= sys_nerr) return "unknown error number";
1069    return sys_errlist[n];
1070    }
1071    #endif /* HAVE_STRERROR */
1072    
1073    
1074    /*************************************************
1075    *         JIT memory callback                    *
1076    *************************************************/
1077    
1078    static pcre_jit_stack* jit_callback(void *arg)
1079    {
1080    return (pcre_jit_stack *)arg;
1081    }
1082    
1083    
1084    #if !defined NOUTF || defined SUPPORT_PCRE16
1085    /*************************************************
1086    *            Convert UTF-8 string to value       *
1087    *************************************************/
1088    
1089    /* This function takes one or more bytes that represents a UTF-8 character,
1090    and returns the value of the character.
1091    
1092    Argument:
1093      utf8bytes   a pointer to the byte vector
1094      vptr        a pointer to an int to receive the value
1095    
1096    Returns:      >  0 => the number of bytes consumed
1097                  -6 to 0 => malformed UTF-8 character at offset = (-return)
1098    */
1099    
1100    static int
1101    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
1102    {
1103    int c = *utf8bytes++;
1104    int d = c;
1105    int i, j, s;
1106    
1107    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
1108      {
1109      if ((d & 0x80) == 0) break;
1110      d <<= 1;
1111      }
1112    
1113    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
1114    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
1115    
1116    /* i now has a value in the range 1-5 */
1117    
1118    s = 6*i;
1119    d = (c & utf8_table3[i]) << s;
1120    
1121    for (j = 0; j < i; j++)
1122      {
1123      c = *utf8bytes++;
1124      if ((c & 0xc0) != 0x80) return -(j+1);
1125      s -= 6;
1126      d |= (c & 0x3f) << s;
1127      }
1128    
1129    /* Check that encoding was the correct unique one */
1130    
1131    for (j = 0; j < utf8_table1_size; j++)
1132      if (d <= utf8_table1[j]) break;
1133    if (j != i) return -(i+1);
1134    
1135    /* Valid value */
1136    
1137    *vptr = d;
1138    return i+1;
1139    }
1140    #endif /* NOUTF || SUPPORT_PCRE16 */
1141    
1142    
1143    
1144    #if !defined NOUTF || defined SUPPORT_PCRE16
1145    /*************************************************
1146    *       Convert character value to UTF-8         *
1147    *************************************************/
1148    
1149    /* This function takes an integer value in the range 0 - 0x7fffffff
1150    and encodes it as a UTF-8 character in 0 to 6 bytes.
1151    
1152    Arguments:
1153      cvalue     the character value
1154      utf8bytes  pointer to buffer for result - at least 6 bytes long
1155    
1156    Returns:     number of characters placed in the buffer
1157    */
1158    
1159    static int
1160    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
1161    {
1162    register int i, j;
1163    for (i = 0; i < utf8_table1_size; i++)
1164      if (cvalue <= utf8_table1[i]) break;
1165    utf8bytes += i;
1166    for (j = i; j > 0; j--)
1167     {
1168     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
1169     cvalue >>= 6;
1170     }
1171    *utf8bytes = utf8_table2[i] | cvalue;
1172    return i + 1;
1173    }
1174    #endif
1175    
1176    
1177    #ifdef SUPPORT_PCRE16
1178    /*************************************************
1179    *         Convert a string to 16-bit             *
1180    *************************************************/
1181    
1182    /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
1183    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
1184    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
1185    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
1186    result is always left in buffer16.
1187    
1188    Note that this function does not object to surrogate values. This is
1189    deliberate; it makes it possible to construct UTF-16 strings that are invalid,
1190    for the purpose of testing that they are correctly faulted.
1191    
1192    Patterns to be converted are either plain ASCII or UTF-8; data lines are always
1193    in UTF-8 so that values greater than 255 can be handled.
1194    
1195    Arguments:
1196      data       TRUE if converting a data line; FALSE for a regex
1197      p          points to a byte string
1198      utf        true if UTF-8 (to be converted to UTF-16)
1199      len        number of bytes in the string (excluding trailing zero)
1200    
1201    Returns:     number of 16-bit data items used (excluding trailing zero)
1202                 OR -1 if a UTF-8 string is malformed
1203                 OR -2 if a value > 0x10ffff is encountered
1204                 OR -3 if a value > 0xffff is encountered when not in UTF mode
1205    */
1206    
1207    static int
1208    to16(int data, pcre_uint8 *p, int utf, int len)
1209    {
1210    pcre_uint16 *pp;
1211    
1212    if (buffer16_size < 2*len + 2)
1213      {
1214      if (buffer16 != NULL) free(buffer16);
1215      buffer16_size = 2*len + 2;
1216      buffer16 = (pcre_uint16 *)malloc(buffer16_size);
1217      if (buffer16 == NULL)
1218        {
1219        fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
1220        exit(1);
1221        }
1222      }
1223    
1224    pp = buffer16;
1225    
1226    if (!utf && !data)
1227      {
1228      while (len-- > 0) *pp++ = *p++;
1229      }
1230    
1231    else
1232      {
1233      int c = 0;
1234      while (len > 0)
1235        {
1236        int chlen = utf82ord(p, &c);
1237        if (chlen <= 0) return -1;
1238        if (c > 0x10ffff) return -2;
1239        p += chlen;
1240        len -= chlen;
1241        if (c < 0x10000) *pp++ = c; else
1242          {
1243          if (!utf) return -3;
1244          c -= 0x10000;
1245          *pp++ = 0xD800 | (c >> 10);
1246          *pp++ = 0xDC00 | (c & 0x3ff);
1247          }
1248        }
1249      }
1250    
1251    *pp = 0;
1252    return pp - buffer16;
1253    }
1254    #endif
1255    
1256    
1257  /*************************************************  /*************************************************
# Line 173  optimal way of handling this, but hey, t Line 1270  optimal way of handling this, but hey, t
1270  Arguments:  Arguments:
1271    f            the file to read    f            the file to read
1272    start        where in buffer to start (this *must* be within buffer)    start        where in buffer to start (this *must* be within buffer)
1273      prompt       for stdin or readline()
1274    
1275  Returns:       pointer to the start of new data  Returns:       pointer to the start of new data
1276                 could be a copy of start, or could be moved                 could be a copy of start, or could be moved
1277                 NULL if no data read and EOF reached                 NULL if no data read and EOF reached
1278  */  */
1279    
1280  static uschar *  static pcre_uint8 *
1281  extend_inputline(FILE *f, uschar *start)  extend_inputline(FILE *f, pcre_uint8 *start, const char *prompt)
1282  {  {
1283  uschar *here = start;  pcre_uint8 *here = start;
1284    
1285  for (;;)  for (;;)
1286    {    {
1287    int rlen = buffer_size - (here - buffer);    size_t rlen = (size_t)(buffer_size - (here - buffer));
1288    
1289    if (rlen > 1000)    if (rlen > 1000)
1290      {      {
1291      int dlen;      int dlen;
1292      if (fgets((char *)here, rlen,  f) == NULL)  
1293        return (here == start)? NULL : start;      /* If libreadline support is required, use readline() to read a line if the
1294        input is a terminal. Note that readline() removes the trailing newline, so
1295        we must put it back again, to be compatible with fgets(). */
1296    
1297    #ifdef SUPPORT_LIBREADLINE
1298        if (isatty(fileno(f)))
1299          {
1300          size_t len;
1301          char *s = readline(prompt);
1302          if (s == NULL) return (here == start)? NULL : start;
1303          len = strlen(s);
1304          if (len > 0) add_history(s);
1305          if (len > rlen - 1) len = rlen - 1;
1306          memcpy(here, s, len);
1307          here[len] = '\n';
1308          here[len+1] = 0;
1309          free(s);
1310          }
1311        else
1312    #endif
1313    
1314        /* Read the next line by normal means, prompting if the file is stdin. */
1315    
1316          {
1317          if (f == stdin) printf("%s", prompt);
1318          if (fgets((char *)here, rlen,  f) == NULL)
1319            return (here == start)? NULL : start;
1320          }
1321    
1322      dlen = (int)strlen((char *)here);      dlen = (int)strlen((char *)here);
1323      if (dlen > 0 && here[dlen - 1] == '\n') return start;      if (dlen > 0 && here[dlen - 1] == '\n') return start;
1324      here += dlen;      here += dlen;
# Line 201  for (;;) Line 1327  for (;;)
1327    else    else
1328      {      {
1329      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
1330      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
1331      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1332      uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1333    
1334      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1335        {        {
# Line 234  return NULL; /* Control never gets here Line 1360  return NULL; /* Control never gets here
1360    
1361    
1362    
   
   
   
   
1363  /*************************************************  /*************************************************
1364  *          Read number from string               *  *          Read number from string               *
1365  *************************************************/  *************************************************/
# Line 254  Returns: the unsigned long Line 1376  Returns: the unsigned long
1376  */  */
1377    
1378  static int  static int
1379  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1380  {  {
1381  int result = 0;  int result = 0;
1382  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 265  return(result); Line 1387  return(result);
1387    
1388    
1389    
   
1390  /*************************************************  /*************************************************
1391  *            Convert UTF-8 string to value       *  *             Print one character                *
1392  *************************************************/  *************************************************/
1393    
1394  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
   
 Argument:  
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
1395    
1396  #if !defined NOUTF8  static int pchar(int c, FILE *f)
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1397  {  {
1398  int c = *utf8bytes++;  if (PRINTOK(c))
 int d = c;  
 int i, j, s;  
   
 for (i = -1; i < 6; i++)               /* i is number of additional bytes */  
1399    {    {
1400    if ((d & 0x80) == 0) break;    if (f != NULL) fprintf(f, "%c", c);
1401    d <<= 1;    return 1;
1402    }    }
1403    
1404  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (c < 0x100)
 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  
   
 /* i now has a value in the range 1-5 */  
   
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
   
 for (j = 0; j < i; j++)  
1405    {    {
1406    c = *utf8bytes++;    if (use_utf)
1407    if ((c & 0xc0) != 0x80) return -(j+1);      {
1408    s -= 6;      if (f != NULL) fprintf(f, "\\x{%02x}", c);
1409    d |= (c & 0x3f) << s;      return 6;
1410    }      }
1411      else
1412  /* Check that encoding was the correct unique one */      {
1413        if (f != NULL) fprintf(f, "\\x%02x", c);
1414  for (j = 0; j < utf8_table1_size; j++)      return 4;
1415    if (d <= utf8_table1[j]) break;      }
1416  if (j != i) return -(i+1);    }
   
 /* Valid value */  
   
 *vptr = d;  
 return i+1;  
 }  
   
 #endif  
   
   
   
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
 */  
   
 #if !defined NOUTF8  
1417    
1418  static int  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1419  ord2utf8(int cvalue, uschar *utf8bytes)  return (c <= 0x000000ff)? 6 :
1420  {         (c <= 0x00000fff)? 7 :
1421  register int i, j;         (c <= 0x0000ffff)? 8 :
1422  for (i = 0; i < utf8_table1_size; i++)         (c <= 0x000fffff)? 9 : 10;
   if (cvalue <= utf8_table1[i]) break;  
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1423  }  }
1424    
 #endif  
   
1425    
1426    
1427    #ifdef SUPPORT_PCRE8
1428  /*************************************************  /*************************************************
1429  *             Print character string             *  *         Print 8-bit character string           *
1430  *************************************************/  *************************************************/
1431    
1432  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1433  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1434    
1435  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(pcre_uint8 *p, int length, FILE *f)
1436  {  {
1437  int c = 0;  int c = 0;
1438  int yield = 0;  int yield = 0;
1439    
1440    if (length < 0)
1441      length = strlen((char *)p);
1442    
1443  while (length-- > 0)  while (length-- > 0)
1444    {    {
1445  #if !defined NOUTF8  #if !defined NOUTF
1446    if (use_utf8)    if (use_utf)
1447      {      {
1448      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
   
1449      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1450        {        {
1451        length -= rc - 1;        length -= rc - 1;
1452        p += rc;        p += rc;
1453        if (PRINTHEX(c))        yield += pchar(c, f);
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
1454        continue;        continue;
1455        }        }
1456      }      }
1457  #endif  #endif
1458      c = *p++;
1459      yield += pchar(c, f);
1460      }
1461    
1462     /* Not UTF-8, or malformed UTF-8  */  return yield;
1463    }
1464    #endif
1465    
1466    c = *p++;  
1467    if (PRINTHEX(c))  
1468      {  #ifdef SUPPORT_PCRE16
1469      if (f != NULL) fprintf(f, "%c", c);  /*************************************************
1470      yield++;  *    Find length of 0-terminated 16-bit string   *
1471      }  *************************************************/
1472    else  
1473    static int strlen16(PCRE_SPTR16 p)
1474    {
1475    int len = 0;
1476    while (*p++ != 0) len++;
1477    return len;
1478    }
1479    #endif  /* SUPPORT_PCRE16 */
1480    
1481    
1482    #ifdef SUPPORT_PCRE16
1483    /*************************************************
1484    *           Print 16-bit character string        *
1485    *************************************************/
1486    
1487    /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1488    If handed a NULL file, just counts chars without printing. */
1489    
1490    static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1491    {
1492    int yield = 0;
1493    
1494    if (length < 0)
1495      length = strlen16(p);
1496    
1497    while (length-- > 0)
1498      {
1499      int c = *p++ & 0xffff;
1500    #if !defined NOUTF
1501      if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1502      {      {
1503      if (f != NULL) fprintf(f, "\\x%02x", c);      int d = *p & 0xffff;
1504      yield += 4;      if (d >= 0xDC00 && d < 0xDFFF)
1505          {
1506          c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1507          length--;
1508          p++;
1509          }
1510      }      }
1511    #endif
1512      yield += pchar(c, f);
1513    }    }
1514    
1515  return yield;  return yield;
1516  }  }
1517    #endif  /* SUPPORT_PCRE16 */
1518    
1519    
1520    
1521    #ifdef SUPPORT_PCRE8
1522    /*************************************************
1523    *     Read a capture name (8-bit) and check it   *
1524    *************************************************/
1525    
1526    static pcre_uint8 *
1527    read_capture_name8(pcre_uint8 *p, pcre_uint8 **pp, pcre *re)
1528    {
1529    pcre_uint8 *npp = *pp;
1530    while (isalnum(*p)) *npp++ = *p++;
1531    *npp++ = 0;
1532    *npp = 0;
1533    if (pcre_get_stringnumber(re, (char *)(*pp)) < 0)
1534      {
1535      fprintf(outfile, "no parentheses with name \"");
1536      PCHARSV(*pp, 0, -1, outfile);
1537      fprintf(outfile, "\"\n");
1538      }
1539    
1540    *pp = npp;
1541    return p;
1542    }
1543    #endif  /* SUPPORT_PCRE8 */
1544    
1545    
1546    
1547    #ifdef SUPPORT_PCRE16
1548    /*************************************************
1549    *     Read a capture name (16-bit) and check it  *
1550    *************************************************/
1551    
1552    /* Note that the text being read is 8-bit. */
1553    
1554    static pcre_uint8 *
1555    read_capture_name16(pcre_uint8 *p, pcre_uint16 **pp, pcre *re)
1556    {
1557    pcre_uint16 *npp = *pp;
1558    while (isalnum(*p)) *npp++ = *p++;
1559    *npp++ = 0;
1560    *npp = 0;
1561    if (pcre16_get_stringnumber((pcre16 *)re, (PCRE_SPTR16)(*pp)) < 0)
1562      {
1563      fprintf(outfile, "no parentheses with name \"");
1564      PCHARSV(*pp, 0, -1, outfile);
1565      fprintf(outfile, "\"\n");
1566      }
1567    *pp = npp;
1568    return p;
1569    }
1570    #endif  /* SUPPORT_PCRE16 */
1571    
1572    
1573    
# Line 452  if (callout_extra) Line 1596  if (callout_extra)
1596      else      else
1597        {        {
1598        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1599        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject, cb->offset_vector[i],
1600          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1601        fprintf(f, "\n");        fprintf(f, "\n");
1602        }        }
# Line 465  printed lengths of the substrings. */ Line 1609  printed lengths of the substrings. */
1609    
1610  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1611    
1612  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, 0, cb->start_match, f);
1613  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject, cb->start_match,
1614    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1615    
1616  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, 0, cb->subject_length, NULL);
1617    
1618  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject, cb->current_position,
1619    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1620    
1621  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 508  fprintf(outfile, "%.*s", (cb->next_item_ Line 1652  fprintf(outfile, "%.*s", (cb->next_item_
1652  fprintf(outfile, "\n");  fprintf(outfile, "\n");
1653  first_callout = 0;  first_callout = 0;
1654    
1655    if (cb->mark != last_callout_mark)
1656      {
1657      if (cb->mark == NULL)
1658        fprintf(outfile, "Latest Mark: <unset>\n");
1659      else
1660        {
1661        fprintf(outfile, "Latest Mark: ");
1662        PCHARSV(cb->mark, 0, -1, outfile);
1663        putc('\n', outfile);
1664        }
1665      last_callout_mark = cb->mark;
1666      }
1667    
1668  if (cb->callout_data != NULL)  if (cb->callout_data != NULL)
1669    {    {
1670    int callout_data = *((int *)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
# Line 527  return (cb->callout_number != callout_fa Line 1684  return (cb->callout_number != callout_fa
1684  *            Local malloc functions              *  *            Local malloc functions              *
1685  *************************************************/  *************************************************/
1686    
1687  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and save the size of a
1688  compiled re. */  compiled re, which is the first store request that pcre_compile() makes. The
1689    show_malloc variable is set only during matching. */
1690    
1691  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
1692  {  {
1693  void *block = malloc(size);  void *block = malloc(size);
1694  gotten_store = size;  gotten_store = size;
1695    if (first_gotten_store == 0) first_gotten_store = size;
1696  if (show_malloc)  if (show_malloc)
1697    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
1698  return block;  return block;
# Line 546  if (show_malloc) Line 1705  if (show_malloc)
1705  free(block);  free(block);
1706  }  }
1707    
   
1708  /* For recursion malloc/free, to test stacking calls */  /* For recursion malloc/free, to test stacking calls */
1709    
1710  static void *stack_malloc(size_t size)  static void *stack_malloc(size_t size)
# Line 569  free(block); Line 1727  free(block);
1727  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1728  *************************************************/  *************************************************/
1729    
1730  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1731    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1732    value, but the code is defensive.
1733    
1734    Arguments:
1735      re        compiled regex
1736      study     study data
1737      option    PCRE_INFO_xxx option
1738      ptr       where to put the data
1739    
1740    Returns:    0 when OK, < 0 on error
1741    */
1742    
1743  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static int
1744    new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1745  {  {
1746  int rc;  int rc;
1747  if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)  
1748    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1749    #ifdef SUPPORT_PCRE16
1750      rc = pcre16_fullinfo((pcre16 *)re, (pcre16_extra *)study, option, ptr);
1751    #else
1752      rc = PCRE_ERROR_BADMODE;
1753    #endif
1754    else
1755    #ifdef SUPPORT_PCRE8
1756      rc = pcre_fullinfo(re, study, option, ptr);
1757    #else
1758      rc = PCRE_ERROR_BADMODE;
1759    #endif
1760    
1761    if (rc < 0)
1762      {
1763      fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1764        use_pcre16? "16" : "", option);
1765      if (rc == PCRE_ERROR_BADMODE)
1766        fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
1767          "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
1768      }
1769    
1770    return rc;
1771  }  }
1772    
1773    
1774    
1775  /*************************************************  /*************************************************
1776  *         Byte flipping function                 *  *             Swap byte functions                *
1777  *************************************************/  *************************************************/
1778    
1779  static unsigned long int  /* The following functions swap the bytes of a pcre_uint16 and pcre_uint32
1780  byteflip(unsigned long int value, int n)  value, respectively.
1781    
1782    Arguments:
1783      value        any number
1784    
1785    Returns:       the byte swapped value
1786    */
1787    
1788    static pcre_uint32
1789    swap_uint32(pcre_uint32 value)
1790  {  {
 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  
1791  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
1792         ((value & 0x0000ff00) <<  8) |         ((value & 0x0000ff00) <<  8) |
1793         ((value & 0x00ff0000) >>  8) |         ((value & 0x00ff0000) >>  8) |
1794         ((value & 0xff000000) >> 24);         (value >> 24);
1795  }  }
1796    
1797    static pcre_uint16
1798    swap_uint16(pcre_uint16 value)
1799    {
1800    return (value >> 8) | (value << 8);
1801    }
1802    
1803    
1804    
1805    /*************************************************
1806    *        Flip bytes in a compiled pattern        *
1807    *************************************************/
1808    
1809    /* This function is called if the 'F' option was present on a pattern that is
1810    to be written to a file. We flip the bytes of all the integer fields in the
1811    regex data block and the study block. In 16-bit mode this also flips relevant
1812    bytes in the pattern itself. This is to make it possible to test PCRE's
1813    ability to reload byte-flipped patterns, e.g. those compiled on a different
1814    architecture. */
1815    
1816    static void
1817    regexflip(pcre *ere, pcre_extra *extra)
1818    {
1819    REAL_PCRE *re = (REAL_PCRE *)ere;
1820    #ifdef SUPPORT_PCRE16
1821    int op;
1822    pcre_uint16 *ptr = (pcre_uint16 *)re + re->name_table_offset;
1823    int length = re->name_count * re->name_entry_size;
1824    #ifdef SUPPORT_UTF
1825    BOOL utf = (re->options & PCRE_UTF16) != 0;
1826    BOOL utf16_char = FALSE;
1827    #endif /* SUPPORT_UTF */
1828    #endif /* SUPPORT_PCRE16 */
1829    
1830    /* Always flip the bytes in the main data block and study blocks. */
1831    
1832    re->magic_number = REVERSED_MAGIC_NUMBER;
1833    re->size = swap_uint32(re->size);
1834    re->options = swap_uint32(re->options);
1835    re->flags = swap_uint16(re->flags);
1836    re->top_bracket = swap_uint16(re->top_bracket);
1837    re->top_backref = swap_uint16(re->top_backref);
1838    re->first_char = swap_uint16(re->first_char);
1839    re->req_char = swap_uint16(re->req_char);
1840    re->name_table_offset = swap_uint16(re->name_table_offset);
1841    re->name_entry_size = swap_uint16(re->name_entry_size);
1842    re->name_count = swap_uint16(re->name_count);
1843    
1844    if (extra != NULL)
1845      {
1846      pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1847      rsd->size = swap_uint32(rsd->size);
1848      rsd->flags = swap_uint32(rsd->flags);
1849      rsd->minlength = swap_uint32(rsd->minlength);
1850      }
1851    
1852    /* In 8-bit mode, that is all we need to do. In 16-bit mode we must swap bytes
1853    in the name table, if present, and then in the pattern itself. */
1854    
1855    #ifdef SUPPORT_PCRE16
1856    if (!use_pcre16) return;
1857    
1858    while(TRUE)
1859      {
1860      /* Swap previous characters. */
1861      while (length-- > 0)
1862        {
1863        *ptr = swap_uint16(*ptr);
1864        ptr++;
1865        }
1866    #ifdef SUPPORT_UTF
1867      if (utf16_char)
1868        {
1869        if ((ptr[-1] & 0xfc00) == 0xd800)
1870          {
1871          /* We know that there is only one extra character in UTF-16. */
1872          *ptr = swap_uint16(*ptr);
1873          ptr++;
1874          }
1875        }
1876      utf16_char = FALSE;
1877    #endif /* SUPPORT_UTF */
1878    
1879      /* Get next opcode. */
1880    
1881      length = 0;
1882      op = *ptr;
1883      *ptr++ = swap_uint16(op);
1884    
1885      switch (op)
1886        {
1887        case OP_END:
1888        return;
1889    
1890    #ifdef SUPPORT_UTF
1891        case OP_CHAR:
1892        case OP_CHARI:
1893        case OP_NOT:
1894        case OP_NOTI:
1895        case OP_STAR:
1896        case OP_MINSTAR:
1897        case OP_PLUS:
1898        case OP_MINPLUS:
1899        case OP_QUERY:
1900        case OP_MINQUERY:
1901        case OP_UPTO:
1902        case OP_MINUPTO:
1903        case OP_EXACT:
1904        case OP_POSSTAR:
1905        case OP_POSPLUS:
1906        case OP_POSQUERY:
1907        case OP_POSUPTO:
1908        case OP_STARI:
1909        case OP_MINSTARI:
1910        case OP_PLUSI:
1911        case OP_MINPLUSI:
1912        case OP_QUERYI:
1913        case OP_MINQUERYI:
1914        case OP_UPTOI:
1915        case OP_MINUPTOI:
1916        case OP_EXACTI:
1917        case OP_POSSTARI:
1918        case OP_POSPLUSI:
1919        case OP_POSQUERYI:
1920        case OP_POSUPTOI:
1921        case OP_NOTSTAR:
1922        case OP_NOTMINSTAR:
1923        case OP_NOTPLUS:
1924        case OP_NOTMINPLUS:
1925        case OP_NOTQUERY:
1926        case OP_NOTMINQUERY:
1927        case OP_NOTUPTO:
1928        case OP_NOTMINUPTO:
1929        case OP_NOTEXACT:
1930        case OP_NOTPOSSTAR:
1931        case OP_NOTPOSPLUS:
1932        case OP_NOTPOSQUERY:
1933        case OP_NOTPOSUPTO:
1934        case OP_NOTSTARI:
1935        case OP_NOTMINSTARI:
1936        case OP_NOTPLUSI:
1937        case OP_NOTMINPLUSI:
1938        case OP_NOTQUERYI:
1939        case OP_NOTMINQUERYI:
1940        case OP_NOTUPTOI:
1941        case OP_NOTMINUPTOI:
1942        case OP_NOTEXACTI:
1943        case OP_NOTPOSSTARI:
1944        case OP_NOTPOSPLUSI:
1945        case OP_NOTPOSQUERYI:
1946        case OP_NOTPOSUPTOI:
1947        if (utf) utf16_char = TRUE;
1948    #endif
1949        /* Fall through. */
1950    
1951        default:
1952        length = OP_lengths16[op] - 1;
1953        break;
1954    
1955        case OP_CLASS:
1956        case OP_NCLASS:
1957        /* Skip the character bit map. */
1958        ptr += 32/sizeof(pcre_uint16);
1959        length = 0;
1960        break;
1961    
1962        case OP_XCLASS:
1963        /* LINK_SIZE can be 1 or 2 in 16 bit mode. */
1964        if (LINK_SIZE > 1)
1965          length = (int)((((unsigned int)(ptr[0]) << 16) | (unsigned int)(ptr[1]))
1966            - (1 + LINK_SIZE + 1));
1967        else
1968          length = (int)((unsigned int)(ptr[0]) - (1 + LINK_SIZE + 1));
1969    
1970        /* Reverse the size of the XCLASS instance. */
1971        *ptr = swap_uint16(*ptr);
1972        ptr++;
1973        if (LINK_SIZE > 1)
1974          {
1975          *ptr = swap_uint16(*ptr);
1976          ptr++;
1977          }
1978    
1979        op = *ptr;
1980        *ptr = swap_uint16(op);
1981        ptr++;
1982        if ((op & XCL_MAP) != 0)
1983          {
1984          /* Skip the character bit map. */
1985          ptr += 32/sizeof(pcre_uint16);
1986          length -= 32/sizeof(pcre_uint16);
1987          }
1988        break;
1989        }
1990      }
1991    /* Control should never reach here in 16 bit mode. */
1992    #endif /* SUPPORT_PCRE16 */
1993    }
1994    
1995    
1996    
# Line 602  return ((value & 0x000000ff) << 24) | Line 1999  return ((value & 0x000000ff) << 24) |
1999  *************************************************/  *************************************************/
2000    
2001  static int  static int
2002  check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,  check_match_limit(pcre *re, pcre_extra *extra, pcre_uint8 *bptr, int len,
2003    int start_offset, int options, int *use_offsets, int use_size_offsets,    int start_offset, int options, int *use_offsets, int use_size_offsets,
2004    int flag, unsigned long int *limit, int errnumber, const char *msg)    int flag, unsigned long int *limit, int errnumber, const char *msg)
2005  {  {
# Line 617  for (;;) Line 2014  for (;;)
2014    {    {
2015    *limit = mid;    *limit = mid;
2016    
2017    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
2018      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
2019    
2020    if (count == errnumber)    if (count == errnumber)
# Line 649  return count; Line 2046  return count;
2046    
2047    
2048  /*************************************************  /*************************************************
2049    *         Case-independent strncmp() function    *
2050    *************************************************/
2051    
2052    /*
2053    Arguments:
2054      s         first string
2055      t         second string
2056      n         number of characters to compare
2057    
2058    Returns:    < 0, = 0, or > 0, according to the comparison
2059    */
2060    
2061    static int
2062    strncmpic(pcre_uint8 *s, pcre_uint8 *t, int n)
2063    {
2064    while (n--)
2065      {
2066      int c = tolower(*s++) - tolower(*t++);
2067      if (c) return c;
2068      }
2069    return 0;
2070    }
2071    
2072    
2073    
2074    /*************************************************
2075  *         Check newline indicator                *  *         Check newline indicator                *
2076  *************************************************/  *************************************************/
2077    
2078  /* This is used both at compile and run-time to check for <xxx> escapes, where  /* This is used both at compile and run-time to check for <xxx> escapes. Print
2079  xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.  a message and return 0 if there is no match.
2080    
2081  Arguments:  Arguments:
2082    p           points after the leading '<'    p           points after the leading '<'
# Line 663  Returns: appropriate PCRE_NEWLINE_x Line 2086  Returns: appropriate PCRE_NEWLINE_x
2086  */  */
2087    
2088  static int  static int
2089  check_newline(uschar *p, FILE *f)  check_newline(pcre_uint8 *p, FILE *f)
2090  {  {
2091  if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;  if (strncmpic(p, (pcre_uint8 *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
2092  if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;  if (strncmpic(p, (pcre_uint8 *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
2093  if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;  if (strncmpic(p, (pcre_uint8 *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
2094  if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;  if (strncmpic(p, (pcre_uint8 *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
2095    if (strncmpic(p, (pcre_uint8 *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
2096    if (strncmpic(p, (pcre_uint8 *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
2097    if (strncmpic(p, (pcre_uint8 *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
2098  fprintf(f, "Unknown newline type at: <%s\n", p);  fprintf(f, "Unknown newline type at: <%s\n", p);
2099  return 0;  return 0;
2100  }  }
# Line 682  return 0; Line 2108  return 0;
2108  static void  static void
2109  usage(void)  usage(void)
2110  {  {
2111  printf("Usage:     pcretest [options] [<input> [<output>]]\n");  printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
2112  printf("  -b       show compiled code (bytecode)\n");  printf("Input and output default to stdin and stdout.\n");
2113    #ifdef SUPPORT_LIBREADLINE
2114    printf("If input is a terminal, readline() is used to read from it.\n");
2115    #else
2116    printf("This version of pcretest is not linked with readline().\n");
2117    #endif
2118    printf("\nOptions:\n");
2119    #ifdef SUPPORT_PCRE16
2120    printf("  -16      use the 16-bit library\n");
2121    #endif
2122    printf("  -b       show compiled code\n");
2123  printf("  -C       show PCRE compile-time options and exit\n");  printf("  -C       show PCRE compile-time options and exit\n");
2124    printf("  -C arg   show a specific compile-time option\n");
2125    printf("           and exit with its value. The arg can be:\n");
2126    printf("     linksize     internal link size [2, 3, 4]\n");
2127    printf("     pcre8        8 bit library support enabled [0, 1]\n");
2128    printf("     pcre16       16 bit library support enabled [0, 1]\n");
2129    printf("     utf          Unicode Transformation Format supported [0, 1]\n");
2130    printf("     ucp          Unicode Properties supported [0, 1]\n");
2131    printf("     jit          Just-in-time compiler supported [0, 1]\n");
2132    printf("     newline      Newline type [CR, LF, CRLF, ANYCRLF, ANY, ???]\n");
2133  printf("  -d       debug: show compiled code and information (-b and -i)\n");  printf("  -d       debug: show compiled code and information (-b and -i)\n");
2134  #if !defined NODFA  #if !defined NODFA
2135  printf("  -dfa     force DFA matching for all subjects\n");  printf("  -dfa     force DFA matching for all subjects\n");
2136  #endif  #endif
2137  printf("  -help    show usage information\n");  printf("  -help    show usage information\n");
2138  printf("  -i       show information about compiled patterns\n"  printf("  -i       show information about compiled patterns\n"
2139           "  -M       find MATCH_LIMIT minimum for each subject\n"
2140         "  -m       output memory used information\n"         "  -m       output memory used information\n"
2141         "  -o <n>   set size of offsets vector to <n>\n");         "  -o <n>   set size of offsets vector to <n>\n");
2142  #if !defined NOPOSIX  #if !defined NOPOSIX
# Line 698  printf(" -p use POSIX interface\n Line 2144  printf(" -p use POSIX interface\n
2144  #endif  #endif
2145  printf("  -q       quiet: do not output PCRE version number at start\n");  printf("  -q       quiet: do not output PCRE version number at start\n");
2146  printf("  -S <n>   set stack size to <n> megabytes\n");  printf("  -S <n>   set stack size to <n> megabytes\n");
2147  printf("  -s       output store (memory) used information\n"  printf("  -s       force each pattern to be studied at basic level\n"
2148           "  -s+      force each pattern to be studied, using JIT if available\n"
2149           "  -s++     ditto, verifying when JIT was actually used\n"
2150           "  -s+n     force each pattern to be studied, using JIT if available,\n"
2151           "             where 1 <= n <= 7 selects JIT options\n"
2152           "  -s++n    ditto, verifying when JIT was actually used\n"
2153         "  -t       time compilation and execution\n");         "  -t       time compilation and execution\n");
2154  printf("  -t <n>   time compilation and execution, repeating <n> times\n");  printf("  -t <n>   time compilation and execution, repeating <n> times\n");
2155  printf("  -tm      time execution (matching) only\n");  printf("  -tm      time execution (matching) only\n");
# Line 718  options, followed by a set of test data, Line 2169  options, followed by a set of test data,
2169  int main(int argc, char **argv)  int main(int argc, char **argv)
2170  {  {
2171  FILE *infile = stdin;  FILE *infile = stdin;
2172    const char *version;
2173  int options = 0;  int options = 0;
2174  int study_options = 0;  int study_options = 0;
2175    int default_find_match_limit = FALSE;
2176  int op = 1;  int op = 1;
2177  int timeit = 0;  int timeit = 0;
2178  int timeitm = 0;  int timeitm = 0;
2179  int showinfo = 0;  int showinfo = 0;
2180  int showstore = 0;  int showstore = 0;
2181    int force_study = -1;
2182    int force_study_options = 0;
2183  int quiet = 0;  int quiet = 0;
2184  int size_offsets = 45;  int size_offsets = 45;
2185  int size_offsets_max;  int size_offsets_max;
# Line 735  int posix = 0; Line 2190  int posix = 0;
2190  int debug = 0;  int debug = 0;
2191  int done = 0;  int done = 0;
2192  int all_use_dfa = 0;  int all_use_dfa = 0;
2193    int verify_jit = 0;
2194  int yield = 0;  int yield = 0;
2195  int stack_size;  int stack_size;
2196    
2197  /* These vectors store, end-to-end, a list of captured substring names. Assume  pcre_jit_stack *jit_stack = NULL;
 that 1024 is plenty long enough for the few names we'll be testing. */  
2198    
2199  uschar copynames[1024];  /* These vectors store, end-to-end, a list of zero-terminated captured
2200  uschar getnames[1024];  substring names, each list itself being terminated by an empty name. Assume
2201    that 1024 is plenty long enough for the few names we'll be testing. It is
2202  uschar *copynamesptr;  easiest to keep separate 8-bit and 16-bit versions, using the 16-bit version
2203  uschar *getnamesptr;  for the actual memory, to ensure alignment. */
2204    
2205    pcre_uint16 copynames[1024];
2206    pcre_uint16 getnames[1024];
2207    
2208    #ifdef SUPPORT_PCRE16
2209    pcre_uint16 *cn16ptr;
2210    pcre_uint16 *gn16ptr;
2211    #endif
2212    
2213  /* Get buffers from malloc() so that Electric Fence will check their misuse  #ifdef SUPPORT_PCRE8
2214  when I am debugging. They grow automatically when very long lines are read. */  pcre_uint8 *copynames8 = (pcre_uint8 *)copynames;
2215    pcre_uint8 *getnames8 = (pcre_uint8 *)getnames;
2216    pcre_uint8 *cn8ptr;
2217    pcre_uint8 *gn8ptr;
2218    #endif
2219    
2220  buffer = (unsigned char *)malloc(buffer_size);  /* Get buffers from malloc() so that valgrind will check their misuse when
2221  dbuffer = (unsigned char *)malloc(buffer_size);  debugging. They grow automatically when very long lines are read. The 16-bit
2222  pbuffer = (unsigned char *)malloc(buffer_size);  buffer (buffer16) is obtained only if needed. */
2223    
2224    buffer = (pcre_uint8 *)malloc(buffer_size);
2225    dbuffer = (pcre_uint8 *)malloc(buffer_size);
2226    pbuffer = (pcre_uint8 *)malloc(buffer_size);
2227    
2228  /* The outfile variable is static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
2229    
# Line 767  it set 0x8000, but then I was advised th Line 2238  it set 0x8000, but then I was advised th
2238  _setmode( _fileno( stdout ), _O_BINARY );  _setmode( _fileno( stdout ), _O_BINARY );
2239  #endif  #endif
2240    
2241    /* Get the version number: both pcre_version() and pcre16_version() give the
2242    same answer. We just need to ensure that we call one that is available. */
2243    
2244    #ifdef SUPPORT_PCRE8
2245    version = pcre_version();
2246    #else
2247    version = pcre16_version();
2248    #endif
2249    
2250  /* Scan options */  /* Scan options */
2251    
2252  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
2253    {    {
2254    unsigned char *endptr;    pcre_uint8 *endptr;
2255      char *arg = argv[op];
2256    
2257    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(arg, "-m") == 0) showstore = 1;
2258      showstore = 1;    else if (strcmp(arg, "-s") == 0) force_study = 0;
2259    else if (strcmp(argv[op], "-q") == 0) quiet = 1;  
2260    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strncmp(arg, "-s+", 3) == 0)
2261    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;      {
2262    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;      arg += 3;
2263        if (*arg == '+') { arg++; verify_jit = TRUE; }
2264        force_study = 1;
2265        if (*arg == 0)
2266          force_study_options = jit_study_bits[6];
2267        else if (*arg >= '1' && *arg <= '7')
2268          force_study_options = jit_study_bits[*arg - '1'];
2269        else goto BAD_ARG;
2270        }
2271      else if (strcmp(arg, "-16") == 0)
2272        {
2273    #ifdef SUPPORT_PCRE16
2274        use_pcre16 = 1;
2275    #else
2276        printf("** This version of PCRE was built without 16-bit support\n");
2277        exit(1);
2278    #endif
2279        }
2280      else if (strcmp(arg, "-q") == 0) quiet = 1;
2281      else if (strcmp(arg, "-b") == 0) debug = 1;
2282      else if (strcmp(arg, "-i") == 0) showinfo = 1;
2283      else if (strcmp(arg, "-d") == 0) showinfo = debug = 1;
2284      else if (strcmp(arg, "-M") == 0) default_find_match_limit = TRUE;
2285  #if !defined NODFA  #if !defined NODFA
2286    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(arg, "-dfa") == 0) all_use_dfa = 1;
2287  #endif  #endif
2288    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(arg, "-o") == 0 && argc > 2 &&
2289        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2290          *endptr == 0))          *endptr == 0))
2291      {      {
2292      op++;      op++;
2293      argc--;      argc--;
2294      }      }
2295    else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)    else if (strcmp(arg, "-t") == 0 || strcmp(arg, "-tm") == 0)
2296      {      {
2297      int both = argv[op][2] == 0;      int both = arg[2] == 0;
2298      int temp;      int temp;
2299      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
2300                       *endptr == 0))                       *endptr == 0))
2301        {        {
2302        timeitm = temp;        timeitm = temp;
# Line 803  while (argc > 1 && argv[op][0] == '-') Line 2306  while (argc > 1 && argv[op][0] == '-')
2306      else timeitm = LOOPREPEAT;      else timeitm = LOOPREPEAT;
2307      if (both) timeit = timeitm;      if (both) timeit = timeitm;
2308      }      }
2309    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(arg, "-S") == 0 && argc > 2 &&
2310        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
2311          *endptr == 0))          *endptr == 0))
2312      {      {
2313  #if defined(_WIN32) || defined(WIN32)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
2314      printf("PCRE: -S not supported on this OS\n");      printf("PCRE: -S not supported on this OS\n");
2315      exit(1);      exit(1);
2316  #else  #else
# Line 826  while (argc > 1 && argv[op][0] == '-') Line 2329  while (argc > 1 && argv[op][0] == '-')
2329  #endif  #endif
2330      }      }
2331  #if !defined NOPOSIX  #if !defined NOPOSIX
2332    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(arg, "-p") == 0) posix = 1;
2333  #endif  #endif
2334    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(arg, "-C") == 0)
2335      {      {
2336      int rc;      int rc;
2337      printf("PCRE version %s\n", pcre_version());      unsigned long int lrc;
2338    
2339        if (argc > 2)
2340          {
2341          if (strcmp(argv[op + 1], "linksize") == 0)
2342            {
2343            (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2344            printf("%d\n", rc);
2345            yield = rc;
2346            goto EXIT;
2347            }
2348          if (strcmp(argv[op + 1], "pcre8") == 0)
2349            {
2350    #ifdef SUPPORT_PCRE8
2351            printf("1\n");
2352            yield = 1;
2353    #else
2354            printf("0\n");
2355            yield = 0;
2356    #endif
2357            goto EXIT;
2358            }
2359          if (strcmp(argv[op + 1], "pcre16") == 0)
2360            {
2361    #ifdef SUPPORT_PCRE16
2362            printf("1\n");
2363            yield = 1;
2364    #else
2365            printf("0\n");
2366            yield = 0;
2367    #endif
2368            goto EXIT;
2369            }
2370          if (strcmp(argv[op + 1], "utf") == 0)
2371            {
2372    #ifdef SUPPORT_PCRE8
2373            (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2374            printf("%d\n", rc);
2375            yield = rc;
2376    #else
2377            (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2378            printf("%d\n", rc);
2379            yield = rc;
2380    #endif
2381            goto EXIT;
2382            }
2383          if (strcmp(argv[op + 1], "ucp") == 0)
2384            {
2385            (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2386            printf("%d\n", rc);
2387            yield = rc;
2388            goto EXIT;
2389            }
2390          if (strcmp(argv[op + 1], "jit") == 0)
2391            {
2392            (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2393            printf("%d\n", rc);
2394            yield = rc;
2395            goto EXIT;
2396            }
2397          if (strcmp(argv[op + 1], "newline") == 0)
2398            {
2399            (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2400            /* Note that these values are always the ASCII values, even
2401            in EBCDIC environments. CR is 13 and NL is 10. */
2402            printf("%s\n", (rc == 13)? "CR" :
2403              (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2404              (rc == -2)? "ANYCRLF" :
2405              (rc == -1)? "ANY" : "???");
2406            goto EXIT;
2407            }
2408          printf("Unknown -C option: %s\n", argv[op + 1]);
2409          goto EXIT;
2410          }
2411    
2412        printf("PCRE version %s\n", version);
2413      printf("Compiled with\n");      printf("Compiled with\n");
2414    
2415    /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
2416    are set, either both UTFs are supported or both are not supported. */
2417    
2418    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2419        printf("  8-bit and 16-bit support\n");
2420        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2421        if (rc)
2422          printf("  UTF-8 and UTF-16 support\n");
2423        else
2424          printf("  No UTF-8 or UTF-16 support\n");
2425    #elif defined SUPPORT_PCRE8
2426        printf("  8-bit support only\n");
2427      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
2428      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
2429      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);  #else
2430        printf("  16-bit support only\n");
2431        (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
2432        printf("  %sUTF-16 support\n", rc? "" : "No ");
2433    #endif
2434    
2435        (void)PCRE_CONFIG(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
2436      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
2437      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_JIT, &rc);
2438      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :      if (rc)
2439        (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :        {
2440          const char *arch;
2441          (void)PCRE_CONFIG(PCRE_CONFIG_JITTARGET, (void *)(&arch));
2442          printf("  Just-in-time compiler support: %s\n", arch);
2443          }
2444        else
2445          printf("  No just-in-time compiler support\n");
2446        (void)PCRE_CONFIG(PCRE_CONFIG_NEWLINE, &rc);
2447        /* Note that these values are always the ASCII values, even
2448        in EBCDIC environments. CR is 13 and NL is 10. */
2449        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
2450          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
2451          (rc == -2)? "ANYCRLF" :
2452        (rc == -1)? "ANY" : "???");        (rc == -1)? "ANY" : "???");
2453      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_BSR, &rc);
2454        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
2455                                         "all Unicode newlines");
2456        (void)PCRE_CONFIG(PCRE_CONFIG_LINK_SIZE, &rc);
2457      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
2458      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
2459      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
2460      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT, &lrc);
2461      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
2462      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
2463      printf("  Default recursion depth limit = %d\n", rc);      printf("  Default recursion depth limit = %ld\n", lrc);
2464      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)PCRE_CONFIG(PCRE_CONFIG_STACKRECURSE, &rc);
2465      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s", rc? "stack" : "heap");
2466      exit(0);      if (showstore)
2467          {
2468          PCRE_EXEC(stack_size, NULL, NULL, NULL, -999, -999, 0, NULL, 0);
2469          printf(": %sframe size = %d bytes", rc? "approximate " : "", -stack_size);
2470          }
2471        printf("\n");
2472        goto EXIT;
2473      }      }
2474    else if (strcmp(argv[op], "-help") == 0 ||    else if (strcmp(arg, "-help") == 0 ||
2475             strcmp(argv[op], "--help") == 0)             strcmp(arg, "--help") == 0)
2476      {      {
2477      usage();      usage();
2478      goto EXIT;      goto EXIT;
2479      }      }
2480    else    else
2481      {      {
2482      printf("** Unknown or malformed option %s\n", argv[op]);      BAD_ARG:
2483        printf("** Unknown or malformed option %s\n", arg);
2484      usage();      usage();
2485      yield = 1;      yield = 1;
2486      goto EXIT;      goto EXIT;
# Line 877  offsets = (int *)malloc(size_offsets_max Line 2496  offsets = (int *)malloc(size_offsets_max
2496  if (offsets == NULL)  if (offsets == NULL)
2497    {    {
2498    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
2499      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
2500    yield = 1;    yield = 1;
2501    goto EXIT;    goto EXIT;
2502    }    }
# Line 908  if (argc > 2) Line 2527  if (argc > 2)
2527    
2528  /* Set alternative malloc function */  /* Set alternative malloc function */
2529    
2530    #ifdef SUPPORT_PCRE8
2531  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
2532  pcre_free = new_free;  pcre_free = new_free;
2533  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
2534  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
2535    #endif
2536    
2537    #ifdef SUPPORT_PCRE16
2538    pcre16_malloc = new_malloc;
2539    pcre16_free = new_free;
2540    pcre16_stack_malloc = stack_malloc;
2541    pcre16_stack_free = stack_free;
2542    #endif
2543    
2544  /* Heading line unless quiet, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
2545    
2546  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", version);
2547    
2548  /* Main loop */  /* Main loop */
2549    
# Line 930  while (!done) Line 2558  while (!done)
2558  #endif  #endif
2559    
2560    const char *error;    const char *error;
2561    unsigned char *p, *pp, *ppp;    pcre_uint8 *markptr;
2562    unsigned char *to_file = NULL;    pcre_uint8 *p, *pp, *ppp;
2563    const unsigned char *tables = NULL;    pcre_uint8 *to_file = NULL;
2564      const pcre_uint8 *tables = NULL;
2565      unsigned long int get_options;
2566    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
2567    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
2568      int do_allcaps = 0;
2569      int do_mark = 0;
2570    int do_study = 0;    int do_study = 0;
2571      int no_force_study = 0;
2572    int do_debug = debug;    int do_debug = debug;
2573    int do_G = 0;    int do_G = 0;
2574    int do_g = 0;    int do_g = 0;
2575    int do_showinfo = showinfo;    int do_showinfo = showinfo;
2576    int do_showrest = 0;    int do_showrest = 0;
2577      int do_showcaprest = 0;
2578    int do_flip = 0;    int do_flip = 0;
2579    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
2580    
2581    use_utf8 = 0;    use_utf = 0;
2582      debug_lengths = 1;
2583    
2584    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (extend_inputline(infile, buffer) == NULL) break;  
2585    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
2586    fflush(outfile);    fflush(outfile);
2587    
# Line 959  while (!done) Line 2593  while (!done)
2593    
2594    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
2595      {      {
2596      unsigned long int magic, get_options;      pcre_uint32 magic;
2597      uschar sbuf[8];      pcre_uint8 sbuf[8];
2598      FILE *f;      FILE *f;
2599    
2600      p++;      p++;
2601        if (*p == '!')
2602          {
2603          do_debug = TRUE;
2604          do_showinfo = TRUE;
2605          p++;
2606          }
2607    
2608      pp = p + (int)strlen((char *)p);      pp = p + (int)strlen((char *)p);
2609      while (isspace(pp[-1])) pp--;      while (isspace(pp[-1])) pp--;
2610      *pp = 0;      *pp = 0;
# Line 975  while (!done) Line 2616  while (!done)
2616        continue;        continue;
2617        }        }
2618    
2619        first_gotten_store = 0;
2620      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;      if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
2621    
2622      true_size =      true_size =
# Line 982  while (!done) Line 2624  while (!done)
2624      true_study_size =      true_study_size =
2625        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];        (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
2626    
2627      re = (real_pcre *)new_malloc(true_size);      re = (pcre *)new_malloc(true_size);
2628      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
2629    
2630      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;      if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
2631    
2632      magic = ((real_pcre *)re)->magic_number;      magic = ((REAL_PCRE *)re)->magic_number;
2633      if (magic != MAGIC_NUMBER)      if (magic != MAGIC_NUMBER)
2634        {        {
2635        if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)        if (swap_uint32(magic) == MAGIC_NUMBER)
2636          {          {
2637          do_flip = 1;          do_flip = 1;
2638          }          }
# Line 1002  while (!done) Line 2644  while (!done)
2644          }          }
2645        }        }
2646    
2647      fprintf(outfile, "Compiled regex%s loaded from %s\n",      /* We hide the byte-invert info for little and big endian tests. */
2648        do_flip? " (byte-inverted)" : "", p);      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
2649          do_flip && (p[-1] == '<') ? " (byte-inverted)" : "", p);
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
2650    
2651      /* Now see if there is any following study data */      /* Now see if there is any following study data. */
2652    
2653      if (true_study_size != 0)      if (true_study_size != 0)
2654        {        {
# Line 1026  while (!done) Line 2664  while (!done)
2664          {          {
2665          FAIL_READ:          FAIL_READ:
2666          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
2667          if (extra != NULL) new_free(extra);          if (extra != NULL)
2668              {
2669              PCRE_FREE_STUDY(extra);
2670              }
2671          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
2672          fclose(f);          fclose(f);
2673          continue;          continue;
# Line 1036  while (!done) Line 2677  while (!done)
2677        }        }
2678      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
2679    
2680        /* Flip the necessary bytes. */
2681        if (do_flip)
2682          {
2683          int rc;
2684          PCRE_PATTERN_TO_HOST_BYTE_ORDER(rc, re, extra, NULL);
2685          if (rc == PCRE_ERROR_BADMODE)
2686            {
2687            /* Simulate the result of the function call below. */
2688            fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
2689              use_pcre16? "16" : "", PCRE_INFO_OPTIONS);
2690            fprintf(outfile, "Running in %s-bit mode but pattern was compiled in "
2691              "%s-bit mode\n", use_pcre16? "16":"8", use_pcre16? "8":"16");
2692            continue;
2693            }
2694          }
2695    
2696        /* Need to know if UTF-8 for printing data strings. */
2697    
2698        if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0) continue;
2699        use_utf = (get_options & PCRE_UTF8) != 0;
2700    
2701      fclose(f);      fclose(f);
2702      goto SHOW_INFO;      goto SHOW_INFO;
2703      }      }
2704    
2705    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
2706    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
2707    
2708    delimiter = *p++;    delimiter = *p++;
2709    
2710    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
2711      {      {
2712      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
2713      goto SKIP_DATA;      goto SKIP_DATA;
2714      }      }
2715    
2716    pp = p;    pp = p;
2717    poffset = p - buffer;    poffset = (int)(p - buffer);
2718    
2719    for(;;)    for(;;)
2720      {      {
# Line 1063  while (!done) Line 2725  while (!done)
2725        pp++;        pp++;
2726        }        }
2727      if (*pp != 0) break;      if (*pp != 0) break;
2728      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if ((pp = extend_inputline(infile, pp)) == NULL)  
2729        {        {
2730        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
2731        done = 1;        done = 1;
# Line 1107  while (!done) Line 2768  while (!done)
2768        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
2769        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
2770    
2771        case '+': do_showrest = 1; break;        case '+':
2772          if (do_showrest) do_showcaprest = 1; else do_showrest = 1;
2773          break;
2774    
2775          case '=': do_allcaps = 1; break;
2776        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
2777        case 'B': do_debug = 1; break;        case 'B': do_debug = 1; break;
2778        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
# Line 1117  while (!done) Line 2782  while (!done)
2782        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
2783        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
2784        case 'J': options |= PCRE_DUPNAMES; break;        case 'J': options |= PCRE_DUPNAMES; break;
2785          case 'K': do_mark = 1; break;
2786        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
2787        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
2788    
# Line 1124  while (!done) Line 2790  while (!done)
2790        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
2791  #endif  #endif
2792    
2793        case 'S': do_study = 1; break;        case 'S':
2794        case 'U': options |= PCRE_UNGREEDY; break;        if (do_study == 0)
2795        case 'X': options |= PCRE_EXTRA; break;          {
2796        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;          do_study = 1;
2797        case '?': options |= PCRE_NO_UTF8_CHECK; break;          if (*pp == '+')
2798              {
2799              if (*(++pp) == '+')
2800                {
2801                verify_jit = TRUE;
2802                pp++;
2803                }
2804              if (*pp >= '1' && *pp <= '7')
2805                study_options |= jit_study_bits[*pp++ - '1'];
2806              else
2807                study_options |= jit_study_bits[6];
2808              }
2809            }
2810          else
2811            {
2812            do_study = 0;
2813            no_force_study = 1;
2814            }
2815          break;
2816    
2817          case 'U': options |= PCRE_UNGREEDY; break;
2818          case 'W': options |= PCRE_UCP; break;
2819          case 'X': options |= PCRE_EXTRA; break;
2820          case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2821          case 'Z': debug_lengths = 0; break;
2822          case '8': options |= PCRE_UTF8; use_utf = 1; break;
2823          case '?': options |= PCRE_NO_UTF8_CHECK; break;
2824    
2825          case 'T':
2826          switch (*pp++)
2827            {
2828            case '0': tables = tables0; break;
2829            case '1': tables = tables1; break;
2830    
2831            case '\r':
2832            case '\n':
2833            case ' ':
2834            case 0:
2835            fprintf(outfile, "** Missing table number after /T\n");
2836            goto SKIP_DATA;
2837    
2838            default:
2839            fprintf(outfile, "** Bad table number \"%c\" after /T\n", pp[-1]);
2840            goto SKIP_DATA;
2841            }
2842          break;
2843    
2844        case 'L':        case 'L':
2845        ppp = pp;        ppp = pp;
# Line 1142  while (!done) Line 2853  while (!done)
2853          goto SKIP_DATA;          goto SKIP_DATA;
2854          }          }
2855        locale_set = 1;        locale_set = 1;
2856        tables = pcre_maketables();        tables = PCRE_MAKETABLES;
2857        pp = ppp;        pp = ppp;
2858        break;        break;
2859    
# Line 1155  while (!done) Line 2866  while (!done)
2866    
2867        case '<':        case '<':
2868          {          {
2869          int x = check_newline(pp, outfile);          if (strncmpic(pp, (pcre_uint8 *)"JS>", 3) == 0)
2870          if (x == 0) goto SKIP_DATA;            {
2871          options |= x;            options |= PCRE_JAVASCRIPT_COMPAT;
2872          while (*pp++ != '>');            pp += 3;
2873              }
2874            else
2875              {
2876              int x = check_newline(pp, outfile);
2877              if (x == 0) goto SKIP_DATA;
2878              options |= x;
2879              while (*pp++ != '>');
2880              }
2881          }          }
2882        break;        break;
2883    
# Line 1175  while (!done) Line 2894  while (!done)
2894    
2895    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
2896    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
2897    local character tables. */    local character tables. Neither does it have 16-bit support. */
2898    
2899  #if !defined NOPOSIX  #if !defined NOPOSIX
2900    if (posix || do_posix)    if (posix || do_posix)
# Line 1188  while (!done) Line 2907  while (!done)
2907      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
2908      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;      if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
2909      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;      if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
2910        if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
2911        if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
2912    
2913        first_gotten_store = 0;
2914      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
2915    
2916      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 1208  while (!done) Line 2930  while (!done)
2930  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
2931    
2932      {      {
2933        /* In 16-bit mode, convert the input. */
2934    
2935    #ifdef SUPPORT_PCRE16
2936        if (use_pcre16)
2937          {
2938          switch(to16(FALSE, p, options & PCRE_UTF8, (int)strlen((char *)p)))
2939            {
2940            case -1:
2941            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2942              "converted to UTF-16\n");
2943            goto SKIP_DATA;
2944    
2945            case -2:
2946            fprintf(outfile, "**Failed: character value greater than 0x10ffff "
2947              "cannot be converted to UTF-16\n");
2948            goto SKIP_DATA;
2949    
2950            case -3: /* "Impossible error" when to16 is called arg1 FALSE */
2951            fprintf(outfile, "**Failed: character value greater than 0xffff "
2952              "cannot be converted to 16-bit in non-UTF mode\n");
2953            goto SKIP_DATA;
2954    
2955            default:
2956            break;
2957            }
2958          p = (pcre_uint8 *)buffer16;
2959          }
2960    #endif
2961    
2962        /* Compile many times when timing */
2963    
2964      if (timeit > 0)      if (timeit > 0)
2965        {        {
2966        register int i;        register int i;
# Line 1215  while (!done) Line 2968  while (!done)
2968        clock_t start_time = clock();        clock_t start_time = clock();
2969        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2970          {          {
2971          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2972          if (re != NULL) free(re);          if (re != NULL) free(re);
2973          }          }
2974        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1224  while (!done) Line 2977  while (!done)
2977            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
2978        }        }
2979    
2980      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      first_gotten_store = 0;
2981        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
2982    
2983      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2984      if non-interactive. */      if non-interactive. */
# Line 1237  while (!done) Line 2991  while (!done)
2991          {          {
2992          for (;;)          for (;;)
2993            {            {
2994            if (extend_inputline(infile, buffer) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
2995              {              {
2996              done = 1;              done = 1;
2997              goto CONTINUE;              goto CONTINUE;
# Line 1251  while (!done) Line 3005  while (!done)
3005        goto CONTINUE;        goto CONTINUE;
3006        }        }
3007    
3008      /* Compilation succeeded; print data if required. There are now two      /* Compilation succeeded. It is now possible to set the UTF-8 option from
3009      info-returning functions. The old one has a limited interface and      within the regex; check for this so that we know how to process the data
3010      returns only limited data. Check that it agrees with the newer one. */      lines. */
3011    
3012      if (log_store)      if (new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options) < 0)
3013        fprintf(outfile, "Memory allocation (code space): %d\n",        goto SKIP_DATA;
3014          (int)(gotten_store -      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
               sizeof(real_pcre) -  
               ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));  
3015    
3016      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
3017      and remember the store that was got. */      and remember the store that was got. */
3018    
3019      true_size = ((real_pcre *)re)->size;      true_size = ((REAL_PCRE *)re)->size;
3020      regex_gotten_store = gotten_store;      regex_gotten_store = first_gotten_store;
3021    
3022      /* If /S was present, study the regexp to generate additional info to      /* Output code size information if requested */
     help with the matching. */  
3023    
3024      if (do_study)      if (log_store)
3025          fprintf(outfile, "Memory allocation (code space): %d\n",
3026            (int)(first_gotten_store -
3027                  sizeof(REAL_PCRE) -
3028                  ((REAL_PCRE *)re)->name_count * ((REAL_PCRE *)re)->name_entry_size));
3029    
3030        /* If -s or /S was present, study the regex to generate additional info to
3031        help with the matching, unless the pattern has the SS option, which
3032        suppresses the effect of /S (used for a few test patterns where studying is
3033        never sensible). */
3034    
3035        if (do_study || (force_study >= 0 && !no_force_study))
3036        {        {
3037        if (timeit > 0)        if (timeit > 0)
3038          {          {
# Line 1278  while (!done) Line 3040  while (!done)
3040          clock_t time_taken;          clock_t time_taken;
3041          clock_t start_time = clock();          clock_t start_time = clock();
3042          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
3043            extra = pcre_study(re, study_options, &error);            {
3044              PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3045              }
3046          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3047          if (extra != NULL) free(extra);          if (extra != NULL)
3048              {
3049              PCRE_FREE_STUDY(extra);
3050              }
3051          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
3052            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
3053              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
3054          }          }
3055        extra = pcre_study(re, study_options, &error);        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
3056        if (error != NULL)        if (error != NULL)
3057          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
3058        else if (extra != NULL)        else if (extra != NULL)
3059            {
3060          true_study_size = ((pcre_study_data *)(extra->study_data))->size;          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
3061            if (log_store)
3062              {
3063              size_t jitsize;
3064              if (new_info(re, extra, PCRE_INFO_JITSIZE, &jitsize) == 0 &&
3065                  jitsize != 0)
3066                fprintf(outfile, "Memory allocation (JIT code): %d\n", (int)jitsize);
3067              }
3068            }
3069        }        }
3070    
3071      /* If the 'F' option was present, we flip the bytes of all the integer      /* If /K was present, we set up for handling MARK data. */
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
3072    
3073      if (do_flip)      if (do_mark)
3074        {        {
3075        real_pcre *rre = (real_pcre *)re;        if (extra == NULL)
       rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));  
       rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));  
       rre->name_table_offset = byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));  
   
       if (extra != NULL)  
3076          {          {
3077          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);          extra = (pcre_extra *)malloc(sizeof(pcre_extra));
3078          rsd->size = byteflip(rsd->size, sizeof(rsd->size));          extra->flags = 0;
         rsd->options = byteflip(rsd->options, sizeof(rsd->options));  
3079          }          }
3080          extra->mark = &markptr;
3081          extra->flags |= PCRE_EXTRA_MARK;
3082        }        }
3083    
3084      /* Extract information from the compiled data if required */      /* Extract and display information from the compiled data if required. */
3085    
3086      SHOW_INFO:      SHOW_INFO:
3087    
3088      if (do_debug)      if (do_debug)
3089        {        {
3090        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
3091        pcre_printint(re, outfile);        PCRE_PRINTINT(re, outfile, debug_lengths);
3092        }        }
3093    
3094        /* We already have the options in get_options (see above) */
3095    
3096      if (do_showinfo)      if (do_showinfo)
3097        {        {
3098        unsigned long int get_options, all_options;        unsigned long int all_options;
3099  #if !defined NOINFOCHECK        int count, backrefmax, first_char, need_char, okpartial, jchanged,
3100        int old_first_char, old_options, old_count;          hascrorlf;
 #endif  
       int count, backrefmax, first_char, need_char;  
3101        int nameentrysize, namecount;        int nameentrysize, namecount;
3102        const uschar *nametable;        const pcre_uint8 *nametable;
3103    
3104        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        if (new_info(re, NULL, PCRE_INFO_SIZE, &size) +
3105        new_info(re, NULL, PCRE_INFO_SIZE, &size);            new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count) +
3106        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);            new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax) +
3107        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);            new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char) +
3108        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);            new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char) +
3109        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);            new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize) +
3110        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);            new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount) +
3111        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);            new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable) +
3112        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);            new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial) +
3113              new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged) +
3114  #if !defined NOINFOCHECK            new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf)
3115        old_count = pcre_info(re, &old_options, &old_first_char);            != 0)
3116        if (count < 0) fprintf(outfile,          goto SKIP_DATA;
         "Error %d from pcre_info()\n", count);  
       else  
         {  
         if (old_count != count) fprintf(outfile,  
           "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,  
             old_count);  
   
         if (old_first_char != first_char) fprintf(outfile,  
           "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",  
             first_char, old_first_char);  
   
         if (old_options != (int)get_options) fprintf(outfile,  
           "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",  
             get_options, old_options);  
         }  
 #endif  
3117    
3118        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
3119          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
# Line 1384  while (!done) Line 3128  while (!done)
3128          fprintf(outfile, "Named capturing subpatterns:\n");          fprintf(outfile, "Named capturing subpatterns:\n");
3129          while (namecount-- > 0)          while (namecount-- > 0)
3130            {            {
3131            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3132              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",            int imm2_size = use_pcre16 ? 1 : 2;
3133              GET2(nametable, 0));  #else
3134              int imm2_size = IMM2_SIZE;
3135    #endif
3136              int length = (int)STRLEN(nametable + imm2_size);
3137              fprintf(outfile, "  ");
3138              PCHARSV(nametable, imm2_size, length, outfile);
3139              while (length++ < nameentrysize - imm2_size) putc(' ', outfile);
3140    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
3141              fprintf(outfile, "%3d\n", use_pcre16?
3142                 (int)(((PCRE_SPTR16)nametable)[0])
3143                :((int)nametable[0] << 8) | (int)nametable[1]);
3144              nametable += nameentrysize * (use_pcre16 ? 2 : 1);
3145    #else
3146              fprintf(outfile, "%3d\n", GET2(nametable, 0));
3147    #ifdef SUPPORT_PCRE8
3148            nametable += nameentrysize;            nametable += nameentrysize;
3149    #else
3150              nametable += nameentrysize * 2;
3151    #endif
3152    #endif
3153            }            }
3154          }          }
3155    
3156        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
3157        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
   
       all_options = ((real_pcre *)re)->options;  
       if (do_flip)  
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
          }  
3158    
3159        if ((all_options & PCRE_NOPARTIAL) != 0)        all_options = ((REAL_PCRE *)re)->options;
3160          fprintf(outfile, "Partial matching not supported\n");        if (do_flip) all_options = swap_uint32(all_options);
3161    
3162        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
3163          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
3164            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
3165            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
3166            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
3167            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
3168            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",            ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
3169            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
3170              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
3171              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
3172            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
3173            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
3174            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
3175            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
3176            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
3177            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
3178              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
3179              ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
3180            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
3181    
3182          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
3183    
3184        switch (get_options & PCRE_NEWLINE_BITS)        switch (get_options & PCRE_NEWLINE_BITS)
3185          {          {
3186          case PCRE_NEWLINE_CR:          case PCRE_NEWLINE_CR:
# Line 1433  while (!done) Line 3195  while (!done)
3195          fprintf(outfile, "Forced newline sequence: CRLF\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
3196          break;          break;
3197    
3198            case PCRE_NEWLINE_ANYCRLF:
3199            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
3200            break;
3201    
3202          case PCRE_NEWLINE_ANY:          case PCRE_NEWLINE_ANY:
3203          fprintf(outfile, "Forced newline sequence: ANY\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
3204          break;          break;
# Line 1451  while (!done) Line 3217  while (!done)
3217          }          }
3218        else        else
3219          {          {
3220          int ch = first_char & 255;          const char *caseless =
3221          const char *caseless = ((first_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_FCH_CASELESS) == 0)?
3222            "" : " (caseless)";            "" : " (caseless)";
3223          if (PRINTHEX(ch))  
3224            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(first_char))
3225              fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
3226          else          else
3227            fprintf(outfile, "First char = %d%s\n", ch, caseless);            {
3228              fprintf(outfile, "First char = ");
3229              pchar(first_char, outfile);
3230              fprintf(outfile, "%s\n", caseless);
3231              }
3232          }          }
3233    
3234        if (need_char < 0)        if (need_char < 0)
# Line 1466  while (!done) Line 3237  while (!done)
3237          }          }
3238        else        else
3239          {          {
3240          int ch = need_char & 255;          const char *caseless =
3241          const char *caseless = ((need_char & REQ_CASELESS) == 0)?            ((((REAL_PCRE *)re)->flags & PCRE_RCH_CASELESS) == 0)?
3242            "" : " (caseless)";            "" : " (caseless)";
3243          if (PRINTHEX(ch))  
3244            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);          if (PRINTOK(need_char))
3245              fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
3246          else          else
3247            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            {
3248              fprintf(outfile, "Need char = ");
3249              pchar(need_char, outfile);
3250              fprintf(outfile, "%s\n", caseless);
3251              }
3252          }          }
3253    
3254        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
3255        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
3256        so messes up the test suite. (And with the /F option, it might be        so messes up the test suite. (And with the /F option, it might be
3257        flipped.) */        flipped.) If study was forced by an external -s, don't show this
3258          information unless -i or -d was also present. This means that, except
3259          when auto-callouts are involved, the output from runs with and without
3260          -s should be identical. */
3261    
3262        if (do_study)        if (do_study || (force_study >= 0 && showinfo && !no_force_study))
3263          {          {
3264          if (extra == NULL)          if (extra == NULL)
3265            fprintf(outfile, "Study returned NULL\n");            fprintf(outfile, "Study returned NULL\n");
3266          else          else
3267            {            {
3268            uschar *start_bits = NULL;            pcre_uint8 *start_bits = NULL;
3269            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);            int minlength;
3270    
3271            if (start_bits == NULL)            if (new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength) == 0)
3272              fprintf(outfile, "No starting byte set\n");              fprintf(outfile, "Subject length lower bound = %d\n", minlength);
3273            else  
3274              if (new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits) == 0)
3275              {              {
3276              int i;              if (start_bits == NULL)
3277              int c = 24;                fprintf(outfile, "No set of starting bytes\n");
3278              fprintf(outfile, "Starting byte set: ");              else
             for (i = 0; i < 256; i++)  
3279                {                {
3280                if ((start_bits[i/8] & (1<<(i&7))) != 0)                int i;
3281                  int c = 24;
3282                  fprintf(outfile, "Starting byte set: ");
3283                  for (i = 0; i < 256; i++)
3284                  {                  {
3285                  if (c > 75)                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
                   {  
                   fprintf(outfile, "\n  ");  
                   c = 2;  
                   }  
                 if (PRINTHEX(i) && i != ' ')  
                   {  
                   fprintf(outfile, "%c ", i);  
                   c += 2;  
                   }  
                 else  
3286                    {                    {
3287                    fprintf(outfile, "\\x%02x ", i);                    if (c > 75)
3288                    c += 5;                      {
3289                        fprintf(outfile, "\n  ");
3290                        c = 2;
3291                        }
3292                      if (PRINTOK(i) && i != ' ')
3293                        {
3294                        fprintf(outfile, "%c ", i);
3295                        c += 2;
3296                        }
3297                      else
3298                        {
3299                        fprintf(outfile, "\\x%02x ", i);
3300                        c += 5;
3301                        }
3302                    }                    }
3303                  }                  }
3304                  fprintf(outfile, "\n");
3305                }                }
3306              fprintf(outfile, "\n");              }
3307              }
3308    
3309            /* Show this only if the JIT was set by /S, not by -s. */
3310    
3311            if ((study_options & PCRE_STUDY_JIT_COMPILE) != 0)
3312              {
3313              int jit;
3314              if (new_info(re, extra, PCRE_INFO_JIT, &jit) == 0)
3315                {
3316                if (jit)
3317                  fprintf(outfile, "JIT study was successful\n");
3318                else
3319    #ifdef SUPPORT_JIT
3320                  fprintf(outfile, "JIT study was not successful\n");
3321    #else
3322                  fprintf(outfile, "JIT support is not available in this version of PCRE\n");
3323    #endif
3324              }              }
3325            }            }
3326          }          }
# Line 1536  while (!done) Line 3339  while (!done)
3339          }          }
3340        else        else
3341          {          {
3342          uschar sbuf[8];          pcre_uint8 sbuf[8];
3343          sbuf[0] = (true_size >> 24)  & 255;  
3344          sbuf[1] = (true_size >> 16)  & 255;          if (do_flip) regexflip(re, extra);
3345          sbuf[2] = (true_size >>  8)  & 255;          sbuf[0] = (pcre_uint8)((true_size >> 24) & 255);
3346          sbuf[3] = (true_size)  & 255;          sbuf[1] = (pcre_uint8)((true_size >> 16) & 255);
3347            sbuf[2] = (pcre_uint8)((true_size >>  8) & 255);
3348          sbuf[4] = (true_study_size >> 24)  & 255;          sbuf[3] = (pcre_uint8)((true_size) & 255);
3349          sbuf[5] = (true_study_size >> 16)  & 255;          sbuf[4] = (pcre_uint8)((true_study_size >> 24) & 255);
3350          sbuf[6] = (true_study_size >>  8)  & 255;          sbuf[5] = (pcre_uint8)((true_study_size >> 16) & 255);
3351          sbuf[7] = (true_study_size)  & 255;          sbuf[6] = (pcre_uint8)((true_study_size >>  8) & 255);
3352            sbuf[7] = (pcre_uint8)((true_study_size) & 255);
3353    
3354          if (fwrite(sbuf, 1, 8, f) < 8 ||          if (fwrite(sbuf, 1, 8, f) < 8 ||
3355              fwrite(re, 1, true_size, f) < true_size)              fwrite(re, 1, true_size, f) < true_size)
# Line 1554  while (!done) Line 3358  while (!done)
3358            }            }
3359          else          else
3360            {            {
3361            fprintf(outfile, "Compiled regex written to %s\n", to_file);            fprintf(outfile, "Compiled pattern written to %s\n", to_file);
3362    
3363              /* If there is study data, write it. */
3364    
3365            if (extra != NULL)            if (extra != NULL)
3366              {              {
3367              if (fwrite(extra->study_data, 1, true_study_size, f) <              if (fwrite(extra->study_data, 1, true_study_size, f) <
# Line 1564  while (!done) Line 3371  while (!done)
3371                  strerror(errno));                  strerror(errno));
3372                }                }
3373              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
   
3374              }              }
3375            }            }
3376          fclose(f);          fclose(f);
3377          }          }
3378    
3379        new_free(re);        new_free(re);
3380        if (extra != NULL) new_free(extra);        if (extra != NULL)
3381        if (tables != NULL) new_free((void *)tables);          {
3382            PCRE_FREE_STUDY(extra);
3383            }
3384          if (locale_set)
3385            {
3386            new_free((void *)tables);
3387            setlocale(LC_CTYPE, "C");
3388            locale_set = 0;
3389            }
3390        continue;  /* With next regex */        continue;  /* With next regex */
3391        }        }
3392      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1581  while (!done) Line 3395  while (!done)
3395    
3396    for (;;)    for (;;)
3397      {      {
3398      uschar *q;      pcre_uint8 *q;
3399      uschar *bptr = dbuffer;      pcre_uint8 *bptr;
3400      int *use_offsets = offsets;      int *use_offsets = offsets;
3401      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
3402      int callout_data = 0;      int callout_data = 0;
3403      int callout_data_set = 0;      int callout_data_set = 0;
3404      int count, c;      int count, c;
3405      int copystrings = 0;      int copystrings = 0;
3406      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
3407      int getstrings = 0;      int getstrings = 0;
3408      int getlist = 0;      int getlist = 0;
3409      int gmatched = 0;      int gmatched = 0;
3410      int start_offset = 0;      int start_offset = 0;
3411        int start_offset_sign = 1;
3412      int g_notempty = 0;      int g_notempty = 0;
3413      int use_dfa = 0;      int use_dfa = 0;
3414        int jit_was_used = 0;
     options = 0;  
3415    
3416      *copynames = 0;      *copynames = 0;
3417      *getnames = 0;      *getnames = 0;
3418    
3419      copynamesptr = copynames;  #ifdef SUPPORT_PCRE16
3420      getnamesptr = getnames;      cn16ptr = copynames;
3421        gn16ptr = getnames;
3422    #endif
3423    #ifdef SUPPORT_PCRE8
3424        cn8ptr = copynames8;
3425        gn8ptr = getnames8;
3426    #endif
3427    
3428      pcre_callout = callout;      SET_PCRE_CALLOUT(callout);
3429      first_callout = 1;      first_callout = 1;
3430        last_callout_mark = NULL;
3431      callout_extra = 0;      callout_extra = 0;
3432      callout_count = 0;      callout_count = 0;
3433      callout_fail_count = 999999;      callout_fail_count = 999999;
3434      callout_fail_id = -1;      callout_fail_id = -1;
3435      show_malloc = 0;      show_malloc = 0;
3436        options = 0;
3437    
3438      if (extra != NULL) extra->flags &=      if (extra != NULL) extra->flags &=
3439        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
# Line 1619  while (!done) Line 3441  while (!done)
3441      len = 0;      len = 0;
3442      for (;;)      for (;;)
3443        {        {
3444        if (infile == stdin) printf("data> ");        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
       if (extend_inputline(infile, buffer + len) == NULL)  
3445          {          {
3446          if (len > 0) break;          if (len > 0)    /* Reached EOF without hitting a newline */
3447              {
3448              fprintf(outfile, "\n");
3449              break;
3450              }
3451          done = 1;          done = 1;
3452          goto CONTINUE;          goto CONTINUE;
3453          }          }
# Line 1638  while (!done) Line 3463  while (!done)
3463      p = buffer;      p = buffer;
3464      while (isspace(*p)) p++;      while (isspace(*p)) p++;
3465    
3466      q = dbuffer;      bptr = q = dbuffer;
3467      while ((c = *p++) != 0)      while ((c = *p++) != 0)
3468        {        {
3469        int i = 0;        int i = 0;
3470        int n = 0;        int n = 0;
3471    
3472        if (c == '\\') switch ((c = *p++))        /* In UTF mode, input can be UTF-8, so just copy all non-backslash bytes.
3473          In non-UTF mode, allow the value of the byte to fall through to later,
3474          where values greater than 127 are turned into UTF-8 when running in
3475          16-bit mode. */
3476    
3477          if (c != '\\')
3478            {
3479            if (use_utf)
3480              {
3481              *q++ = c;
3482              continue;
3483              }
3484            }
3485    
3486          /* Handle backslash escapes */
3487    
3488          else switch ((c = *p++))
3489          {          {
3490          case 'a': c =    7; break;          case 'a': c =    7; break;
3491          case 'b': c = '\b'; break;          case 'b': c = '\b'; break;
# Line 1660  while (!done) Line 3501  while (!done)
3501          c -= '0';          c -= '0';
3502          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
3503            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
   
 #if !defined NOUTF8  
         if (use_utf8 && c > 255)  
           {  
           unsigned char buff8[8];  
           int ii, utn;  
           utn = ord2utf8(c, buff8);  
           for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
           c = buff8[ii];   /* Last byte */  
           }  
 #endif  
3504          break;          break;
3505    
3506          case 'x':          case 'x':
   
         /* Handle \x{..} specially - new Perl thing for utf8 */  
   
 #if !defined NOUTF8  
3507          if (*p == '{')          if (*p == '{')
3508            {            {
3509            unsigned char *pt = p;            pcre_uint8 *pt = p;
3510            c = 0;            c = 0;
3511            while (isxdigit(*(++pt)))  
3512              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
3513              when isxdigit() is a macro that refers to its argument more than
3514              once. This is banned by the C Standard, but apparently happens in at
3515              least one MacOS environment. */
3516    
3517              for (pt++; isxdigit(*pt); pt++)
3518                {
3519                if (++i == 9)
3520                  fprintf(outfile, "** Too many hex digits in \\x{...} item; "
3521                                   "using only the first eight.\n");
3522                else c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
3523                }
3524            if (*pt == '}')            if (*pt == '}')
3525              {              {
             unsigned char buff8[8];  
             int ii, utn;  
             utn = ord2utf8(c, buff8);  
             for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];  
             c = buff8[ii];   /* Last byte */  
3526              p = pt + 1;              p = pt + 1;
3527              break;              break;
3528              }              }
3529            /* Not correct form; fall through */            /* Not correct form for \x{...}; fall through */
3530            }            }
 #endif  
3531    
3532          /* Ordinary \x */          /* \x without {} always defines just one byte in 8-bit mode. This
3533            allows UTF-8 characters to be constructed byte by byte, and also allows
3534            invalid UTF-8 sequences to be made. Just copy the byte in UTF mode.
3535            Otherwise, pass it down to later code so that it can be turned into
3536            UTF-8 when running in 16-bit mode. */
3537    
3538          c = 0;          c = 0;
3539          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
3540            {            {
3541            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');            c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'a' - 10);
3542            p++;            p++;
3543            }            }
3544            if (use_utf)
3545              {
3546              *q++ = c;
3547              continue;
3548              }
3549          break;          break;
3550    
3551          case 0:   /* \ followed by EOF allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
# Line 1713  while (!done) Line 3553  while (!done)
3553          continue;          continue;
3554    
3555          case '>':          case '>':
3556            if (*p == '-')
3557              {
3558              start_offset_sign = -1;
3559              p++;
3560              }
3561          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';          while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
3562            start_offset *= start_offset_sign;
3563          continue;          continue;
3564    
3565          case 'A':  /* Option setting */          case 'A':  /* Option setting */
# Line 1732  while (!done) Line 3578  while (!done)
3578            }            }
3579          else if (isalnum(*p))          else if (isalnum(*p))
3580            {            {
3581            uschar *npp = copynamesptr;            READ_CAPTURE_NAME(p, &cn8ptr, &cn16ptr, re);
           while (isalnum(*p)) *npp++ = *p++;  
           *npp++ = 0;  
           *npp = 0;  
           n = pcre_get_stringnumber(re, (char *)copynamesptr);  
           if (n < 0)  
             fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);  
           copynamesptr = npp;  
3582            }            }
3583          else if (*p == '+')          else if (*p == '+')
3584            {            {
# Line 1748  while (!done) Line 3587  while (!done)
3587            }            }
3588          else if (*p == '-')          else if (*p == '-')
3589            {            {
3590            pcre_callout = NULL;            SET_PCRE_CALLOUT(NULL);
3591            p++;            p++;
3592            }            }
3593          else if (*p == '!')          else if (*p == '!')
# Line 1786  while (!done) Line 3625  while (!done)
3625  #endif  #endif
3626            use_dfa = 1;            use_dfa = 1;
3627          continue;          continue;
3628    #endif
3629    
3630    #if !defined NODFA
3631          case 'F':          case 'F':
3632          options |= PCRE_DFA_SHORTEST;          options |= PCRE_DFA_SHORTEST;
3633          continue;          continue;
# Line 1800  while (!done) Line 3641  while (!done)
3641            }            }
3642          else if (isalnum(*p))          else if (isalnum(*p))
3643            {            {
3644            uschar *npp = getnamesptr;            READ_CAPTURE_NAME(p, &gn8ptr, &gn16ptr, re);
3645            while (isalnum(*p)) *npp++ = *p++;            }
3646            *npp++ = 0;          continue;
3647            *npp = 0;  
3648            n = pcre_get_stringnumber(re, (char *)getnamesptr);          case 'J':
3649            if (n < 0)          while(isdigit(*p)) n = n * 10 + *p++ - '0';
3650              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);          if (extra != NULL
3651            getnamesptr = npp;              && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0
3652                && extra->executable_jit != NULL)
3653              {
3654              if (jit_stack != NULL) { PCRE_JIT_STACK_FREE(jit_stack); }
3655              jit_stack = PCRE_JIT_STACK_ALLOC(1, n * 1024);
3656              PCRE_ASSIGN_JIT_STACK(extra, jit_callback, jit_stack);
3657            }            }
3658          continue;          continue;
3659    
# Line 1820  while (!done) Line 3666  while (!done)
3666          continue;          continue;
3667    
3668          case 'N':          case 'N':
3669          options |= PCRE_NOTEMPTY;          if ((options & PCRE_NOTEMPTY) != 0)
3670              options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
3671            else
3672              options |= PCRE_NOTEMPTY;
3673          continue;          continue;
3674    
3675          case 'O':          case 'O':
# Line 1833  while (!done) Line 3682  while (!done)
3682            if (offsets == NULL)            if (offsets == NULL)
3683              {              {
3684              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
3685                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
3686              yield = 1;              yield = 1;
3687              goto EXIT;              goto EXIT;
3688              }              }
# Line 1843  while (!done) Line 3692  while (!done)
3692          continue;          continue;
3693    
3694          case 'P':          case 'P':
3695          options |= PCRE_PARTIAL;          options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
3696              PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
3697          continue;          continue;
3698    
3699          case 'Q':          case 'Q':
# Line 1878  while (!done) Line 3728  while (!done)
3728          show_malloc = 1;          show_malloc = 1;
3729          continue;          continue;
3730    
3731            case 'Y':
3732            options |= PCRE_NO_START_OPTIMIZE;
3733            continue;
3734    
3735          case 'Z':          case 'Z':
3736          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
3737          continue;          continue;
# Line 1895  while (!done) Line 3749  while (!done)
3749            }            }
3750          continue;          continue;
3751          }          }
3752        *q++ = c;  
3753          /* We now have a character value in c that may be greater than 255. In
3754          16-bit mode, we always convert characters to UTF-8 so that values greater
3755          than 255 can be passed to non-UTF 16-bit strings. In 8-bit mode we
3756          convert to UTF-8 if we are in UTF mode. Values greater than 127 in UTF
3757          mode must have come from \x{...} or octal constructs because values from
3758          \x.. get this far only in non-UTF mode. */
3759    
3760    #if !defined NOUTF || defined SUPPORT_PCRE16
3761          if (use_pcre16 || use_utf)
3762            {
3763            pcre_uint8 buff8[8];
3764            int ii, utn;
3765            utn = ord2utf8(c, buff8);
3766            for (ii = 0; ii < utn; ii++) *q++ = buff8[ii];
3767            }
3768          else
3769    #endif
3770            {
3771            if (c > 255)
3772              {
3773              fprintf(outfile, "** Character \\x{%x} is greater than 255 "
3774                "and UTF-8 mode is not enabled.\n", c);
3775              fprintf(outfile, "** Truncation will probably give the wrong "
3776                "result.\n");
3777              }
3778            *q++ = c;
3779            }
3780        }        }
3781    
3782        /* Reached end of subject string */
3783    
3784      *q = 0;      *q = 0;
3785      len = q - dbuffer;      len = (int)(q - dbuffer);
3786    
3787        /* Move the data to the end of the buffer so that a read over the end of
3788        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
3789        we are using the POSIX interface, we must include the terminating zero. */