/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 55 by nigel, Sat Feb 24 21:39:46 2007 UTC revision 199 by ph10, Tue Jul 31 14:39:09 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    
52    /* A number of things vary for Windows builds. Originally, pcretest opened its
53    input and output without "b"; then I was told that "b" was needed in some
54    environments, so it was added for release 5.0 to both the input and output. (It
55    makes no difference on Unix-like systems.) Later I was told that it is wrong
56    for the input on Windows. I've now abstracted the modes into two macros that
57    are set here, to make it easier to fiddle with them, and removed "b" from the
58    input mode under Windows. */
59    
60    #if defined(_WIN32) || defined(WIN32)
61    #include <io.h>                /* For _setmode() */
62    #include <fcntl.h>             /* For _O_BINARY */
63    #define INPUT_MODE   "r"
64    #define OUTPUT_MODE  "wb"
65    
66    #else
67    #include <sys/time.h>          /* These two includes are needed */
68    #include <sys/resource.h>      /* for setrlimit(). */
69    #define INPUT_MODE   "rb"
70    #define OUTPUT_MODE  "wb"
71    #endif
72    
73    
74  /* Use the internal info for displaying the results of pcre_study(). */  /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76    internal macros, structures, and other internal data values; pcretest has
77    "inside information" compared to a program that strictly follows the PCRE API.
78    
79    Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84    #include "pcre_internal.h"
85    
86    /* We need access to the data tables that PCRE uses. So as not to have to keep
87    two copies, we include the source file here, changing the names of the external
88    symbols to prevent clashes. */
89    
90    #define _pcre_utf8_table1      utf8_table1
91    #define _pcre_utf8_table1_size utf8_table1_size
92    #define _pcre_utf8_table2      utf8_table2
93    #define _pcre_utf8_table3      utf8_table3
94    #define _pcre_utf8_table4      utf8_table4
95    #define _pcre_utt              utt
96    #define _pcre_utt_size         utt_size
97    #define _pcre_OP_lengths       OP_lengths
98    
99    #include "pcre_tables.c"
100    
101    /* We also need the pcre_printint() function for printing out compiled
102    patterns. This function is in a separate file so that it can be included in
103    pcre_compile.c when that module is compiled with debugging enabled.
104    
105    The definition of the macro PRINTABLE, which determines whether to print an
106    output character as-is or as a hex value when showing compiled patterns, is
107    contained in this file. We uses it here also, in cases when the locale has not
108    been explicitly changed, so as to get consistent output from systems that
109    differ in their output from isprint() even in the "C" locale. */
110    
111    #include "pcre_printint.src"
112    
113    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114    
 #include "internal.h"  
115    
116  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
117  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 121  Makefile. */
121  #include "pcreposix.h"  #include "pcreposix.h"
122  #endif  #endif
123    
124    /* It is also possible, for the benefit of the version currently imported into
125    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126    interface to the DFA matcher (NODFA), and without the doublecheck of the old
127    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137    /* Other parameters */
138    
139  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
140  #ifdef CLK_TCK  #ifdef CLK_TCK
141  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 144  Makefile. */
144  #endif  #endif
145  #endif  #endif
146    
147  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
148    
149    #define LOOPREPEAT 500000
150    
151    /* Static variables */
152    
153  static FILE *outfile;  static FILE *outfile;
154  static int log_store = 0;  static int log_store = 0;
155    static int callout_count;
156    static int callout_extra;
157    static int callout_fail_count;
158    static int callout_fail_id;
159    static int first_callout;
160    static int locale_set = 0;
161    static int show_malloc;
162    static int use_utf8;
163  static size_t gotten_store;  static size_t gotten_store;
164    
165    /* The buffers grow automatically if very long input lines are encountered. */
166    
167    static int buffer_size = 50000;
168    static uschar *buffer = NULL;
169    static uschar *dbuffer = NULL;
170    static uschar *pbuffer = NULL;
171    
172    
173    
174    /*************************************************
175    *        Read or extend an input line            *
176    *************************************************/
177    
178    /* Input lines are read into buffer, but both patterns and data lines can be
179    continued over multiple input lines. In addition, if the buffer fills up, we
180    want to automatically expand it so as to be able to handle extremely large
181    lines that are needed for certain stress tests. When the input buffer is
182    expanded, the other two buffers must also be expanded likewise, and the
183    contents of pbuffer, which are a copy of the input for callouts, must be
184    preserved (for when expansion happens for a data line). This is not the most
185    optimal way of handling this, but hey, this is just a test program!
186    
187    Arguments:
188      f            the file to read
189      start        where in buffer to start (this *must* be within buffer)
190    
191    Returns:       pointer to the start of new data
192                   could be a copy of start, or could be moved
193                   NULL if no data read and EOF reached
194    */
195    
196    static uschar *
197    extend_inputline(FILE *f, uschar *start)
198    {
199    uschar *here = start;
200    
201    for (;;)
202      {
203      int rlen = buffer_size - (here - buffer);
204    
205      if (rlen > 1000)
206        {
207        int dlen;
208        if (fgets((char *)here, rlen,  f) == NULL)
209          return (here == start)? NULL : start;
210        dlen = (int)strlen((char *)here);
211        if (dlen > 0 && here[dlen - 1] == '\n') return start;
212        here += dlen;
213        }
214    
215      else
216        {
217        int new_buffer_size = 2*buffer_size;
218        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
219        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
220        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
221    
222        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
223          {
224          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
225          exit(1);
226          }
227    
228        memcpy(new_buffer, buffer, buffer_size);
229        memcpy(new_pbuffer, pbuffer, buffer_size);
230    
231        buffer_size = new_buffer_size;
232    
233        start = new_buffer + (start - buffer);
234        here = new_buffer + (here - buffer);
235    
236        free(buffer);
237        free(dbuffer);
238        free(pbuffer);
239    
240        buffer = new_buffer;
241        dbuffer = new_dbuffer;
242        pbuffer = new_pbuffer;
243        }
244      }
245    
246    return NULL;  /* Control never gets here */
247    }
248    
249    
250    
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
251    
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
252    
 static int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
253    
254    
255  /*************************************************  /*************************************************
256  *       Convert character value to UTF-8         *  *          Read number from string               *
257  *************************************************/  *************************************************/
258    
259  /* This function takes an integer value in the range 0 - 0x7fffffff  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
260  and encodes it as a UTF-8 character in 0 to 6 bytes.  around with conditional compilation, just do the job by hand. It is only used
261    for unpicking arguments, so just keep it simple.
262    
263  Arguments:  Arguments:
264    cvalue     the character value    str           string to be converted
265    buffer     pointer to buffer for result - at least 6 bytes long    endptr        where to put the end pointer
266    
267  Returns:     number of characters placed in the buffer  Returns:        the unsigned long
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
268  */  */
269    
270  static int  static int
271  ord2utf8(int cvalue, unsigned char *buffer)  get_value(unsigned char *str, unsigned char **endptr)
272  {  {
273  register int i, j;  int result = 0;
274  for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  while(*str != 0 && isspace(*str)) str++;
275    if (cvalue <= utf8_table1[i]) break;  while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
276  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  *endptr = str;
277  if (cvalue < 0) return -1;  return(result);
 *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
 cvalue >>= 6 - i;  
 for (j = 0; j < i; j++)  
   {  
   *buffer++ = 0x80 | (cvalue & 0x3f);  
   cvalue >>= 6;  
   }  
 return i + 1;  
278  }  }
279    
280    
281    
282    
283  /*************************************************  /*************************************************
284  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
285  *************************************************/  *************************************************/
# Line 92  return i + 1; Line 288  return i + 1;
288  and returns the value of the character.  and returns the value of the character.
289    
290  Argument:  Argument:
291    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
292    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
293    
294  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
295             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
296  */  */
297    
298  int  #if !defined NOUTF8
299  utf82ord(unsigned char *buffer, int *vptr)  
300    static int
301    utf82ord(unsigned char *utf8bytes, int *vptr)
302  {  {
303  int c = *buffer++;  int c = *utf8bytes++;
304  int d = c;  int d = c;
305  int i, j, s;  int i, j, s;
306    
# Line 117  if (i == 0 || i == 6) return 0; / Line 315  if (i == 0 || i == 6) return 0; /
315    
316  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
317    
318  d = c & utf8_table3[i];  s = 6*i;
319  s = 6 - i;  d = (c & utf8_table3[i]) << s;
320    
321  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
322    {    {
323    c = *buffer++;    c = *utf8bytes++;
324    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
325      s -= 6;
326    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
327    }    }
328    
329  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
330    
331  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
332    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
333  if (j != i) return -(i+1);  if (j != i) return -(i+1);
334    
# Line 140  if (j != i) return -(i+1); Line 338  if (j != i) return -(i+1);
338  return i+1;  return i+1;
339  }  }
340    
341    #endif
342    
343    
344    
345    /*************************************************
346    *       Convert character value to UTF-8         *
347    *************************************************/
348    
349    /* This function takes an integer value in the range 0 - 0x7fffffff
350    and encodes it as a UTF-8 character in 0 to 6 bytes.
351    
352  /* Debugging function to print the internal form of the regex. This is the same  Arguments:
353  code as contained in pcre.c under the DEBUG macro. */    cvalue     the character value
354      utf8bytes  pointer to buffer for result - at least 6 bytes long
355    
356  static const char *OP_names[] = {  Returns:     number of characters placed in the buffer
357    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  */
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
358    
359    #if !defined NOUTF8
360    
361  static void print_internals(pcre *re)  static int
362    ord2utf8(int cvalue, uschar *utf8bytes)
363  {  {
364  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
365    for (i = 0; i < utf8_table1_size; i++)
366  fprintf(outfile, "------------------------------------------------------------------\n");    if (cvalue <= utf8_table1[i]) break;
367    utf8bytes += i;
368    for (j = i; j > 0; j--)
369     {
370     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
371     cvalue >>= 6;
372     }
373    *utf8bytes = utf8_table2[i] | cvalue;
374    return i + 1;
375    }
376    
377  for(;;)  #endif
   {  
   int c;  
   int charlength;  
378    
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
379    
   if (*code >= OP_BRA)  
     {  
     if (*code - OP_BRA > EXTRACT_BASIC_MAX)  
       fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);  
     else  
       fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
380    
381    else switch(*code)  /*************************************************
382      {  *             Print character string             *
383      case OP_END:  *************************************************/
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
384    
385        CLASS_REF_REPEAT:  /* Character string printing function. Must handle UTF-8 strings in utf8
386    mode. Yields number of characters printed. If handed a NULL file, just counts
387    chars without printing. */
388    
389        switch(*code)  static int pchars(unsigned char *p, int length, FILE *f)
390          {  {
391          case OP_CRSTAR:  int c = 0;
392          case OP_CRMINSTAR:  int yield = 0;
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
393    
394          case OP_CRRANGE:  while (length-- > 0)
395          case OP_CRMINRANGE:    {
396          min = (code[1] << 8) + code[2];  #if !defined NOUTF8
397          max = (code[3] << 8) + code[4];    if (use_utf8)
398          if (max == 0) fprintf(outfile, "{%d,}", min);      {
399          else fprintf(outfile, "{%d,%d}", min, max);      int rc = utf82ord(p, &c);
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
400    
401          default:      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
402          code--;        {
403          length -= rc - 1;
404          p += rc;
405          if (PRINTHEX(c))
406            {
407            if (f != NULL) fprintf(f, "%c", c);
408            yield++;
409            }
410          else
411            {
412            int n = 4;
413            if (f != NULL) fprintf(f, "\\x{%02x}", c);
414            yield += (n <= 0x000000ff)? 2 :
415                     (n <= 0x00000fff)? 3 :
416                     (n <= 0x0000ffff)? 4 :
417                     (n <= 0x000fffff)? 5 : 6;
418          }          }
419          continue;
420        }        }
421      break;      }
422    #endif
423    
424      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
425    
426      default:    c = *p++;
427      fprintf(outfile, "    %s", OP_names[*code]);    if (PRINTHEX(c))
428      break;      {
429        if (f != NULL) fprintf(f, "%c", c);
430        yield++;
431        }
432      else
433        {
434        if (f != NULL) fprintf(f, "\\x%02x", c);
435        yield += 4;
436      }      }
   
   code++;  
   fprintf(outfile, "\n");  
437    }    }
438    
439    return yield;
440  }  }
441    
442    
443    
444  /* Character string printing function. A "normal" and a UTF-8 version. */  /*************************************************
445    *              Callout function                  *
446    *************************************************/
447    
448    /* Called from PCRE as a result of the (?C) item. We print out where we are in
449    the match. Yield zero unless more callouts than the fail count, or the callout
450    data is not zero. */
451    
452  static void pchars(unsigned char *p, int length, int utf8)  static int callout(pcre_callout_block *cb)
453  {  {
454  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
455  while (length-- > 0)  int i, pre_start, post_start, subject_length;
456    
457    if (callout_extra)
458    {    {
459    if (utf8)    fprintf(f, "Callout %d: last capture = %d\n",
460        cb->callout_number, cb->capture_last);
461    
462      for (i = 0; i < cb->capture_top * 2; i += 2)
463      {      {
464      int rc = utf82ord(p, &c);      if (cb->offset_vector[i] < 0)
465      if (rc > 0)        fprintf(f, "%2d: <unset>\n", i/2);
466        else
467        {        {
468        length -= rc - 1;        fprintf(f, "%2d: ", i/2);
469        p += rc;        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
470        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
471          else fprintf(outfile, "\\x{%02x}", c);        fprintf(f, "\n");
       continue;  
472        }        }
473      }      }
474      }
475    
476     /* Not UTF-8, or malformed UTF-8  */  /* Re-print the subject in canonical form, the first time or if giving full
477    datails. On subsequent calls in the same match, we use pchars just to find the
478    printed lengths of the substrings. */
479    
480    if (f != NULL) fprintf(f, "--->");
481    
482    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
483    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
484      cb->current_position - cb->start_match, f);
485    
486    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
487    
488    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
489      cb->subject_length - cb->current_position, f);
490    
491    if (f != NULL) fprintf(f, "\n");
492    
493    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  /* Always print appropriate indicators, with callout number if not already
494      else fprintf(outfile, "\\x%02x", c);  shown. For automatic callouts, show the pattern offset. */
495    
496    if (cb->callout_number == 255)
497      {
498      fprintf(outfile, "%+3d ", cb->pattern_position);
499      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
500      }
501    else
502      {
503      if (callout_extra) fprintf(outfile, "    ");
504        else fprintf(outfile, "%3d ", cb->callout_number);
505      }
506    
507    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
508    fprintf(outfile, "^");
509    
510    if (post_start > 0)
511      {
512      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
513      fprintf(outfile, "^");
514    }    }
515    
516    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
517      fprintf(outfile, " ");
518    
519    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
520      pbuffer + cb->pattern_position);
521    
522    fprintf(outfile, "\n");
523    first_callout = 0;
524    
525    if (cb->callout_data != NULL)
526      {
527      int callout_data = *((int *)(cb->callout_data));
528      if (callout_data != 0)
529        {
530        fprintf(outfile, "Callout data = %d\n", callout_data);
531        return callout_data;
532        }
533      }
534    
535    return (cb->callout_number != callout_fail_id)? 0 :
536           (++callout_count >= callout_fail_count)? 1 : 0;
537  }  }
538    
539    
540    /*************************************************
541    *            Local malloc functions              *
542    *************************************************/
543    
544  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
545  compiled re. */  compiled re. */
546    
547  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
548  {  {
549    void *block = malloc(size);
550  gotten_store = size;  gotten_store = size;
551  if (log_store)  if (show_malloc)
552    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
553      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
554  return malloc(size);  }
555    
556    static void new_free(void *block)
557    {
558    if (show_malloc)
559      fprintf(outfile, "free             %p\n", block);
560    free(block);
561    }
562    
563    
564    /* For recursion malloc/free, to test stacking calls */
565    
566    static void *stack_malloc(size_t size)
567    {
568    void *block = malloc(size);
569    if (show_malloc)
570      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
571    return block;
572  }  }
573    
574    static void stack_free(void *block)
575    {
576    if (show_malloc)
577      fprintf(outfile, "stack_free       %p\n", block);
578    free(block);
579    }
580    
581    
582    /*************************************************
583    *          Call pcre_fullinfo()                  *
584    *************************************************/
585    
586  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
587    
# Line 419  if ((rc = pcre_fullinfo(re, study, optio Line 594  if ((rc = pcre_fullinfo(re, study, optio
594    
595    
596    
597    /*************************************************
598    *         Byte flipping function                 *
599    *************************************************/
600    
601    static unsigned long int
602    byteflip(unsigned long int value, int n)
603    {
604    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
605    return ((value & 0x000000ff) << 24) |
606           ((value & 0x0000ff00) <<  8) |
607           ((value & 0x00ff0000) >>  8) |
608           ((value & 0xff000000) >> 24);
609    }
610    
611    
612    
613    
614    /*************************************************
615    *        Check match or recursion limit          *
616    *************************************************/
617    
618    static int
619    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
620      int start_offset, int options, int *use_offsets, int use_size_offsets,
621      int flag, unsigned long int *limit, int errnumber, const char *msg)
622    {
623    int count;
624    int min = 0;
625    int mid = 64;
626    int max = -1;
627    
628    extra->flags |= flag;
629    
630    for (;;)
631      {
632      *limit = mid;
633    
634      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
635        use_offsets, use_size_offsets);
636    
637      if (count == errnumber)
638        {
639        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
640        min = mid;
641        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
642        }
643    
644      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
645                             count == PCRE_ERROR_PARTIAL)
646        {
647        if (mid == min + 1)
648          {
649          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
650          break;
651          }
652        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
653        max = mid;
654        mid = (min + mid)/2;
655        }
656      else break;    /* Some other error */
657      }
658    
659    extra->flags &= ~flag;
660    return count;
661    }
662    
663    
664    
665    /*************************************************
666    *         Check newline indicator                *
667    *************************************************/
668    
669    /* This is used both at compile and run-time to check for <xxx> escapes, where
670    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
671    no match.
672    
673    Arguments:
674      p           points after the leading '<'
675      f           file for error message
676    
677    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
678    */
679    
680    static int
681    check_newline(uschar *p, FILE *f)
682    {
683    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
684    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
685    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
686    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
687    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
688    fprintf(f, "Unknown newline type at: <%s\n", p);
689    return 0;
690    }
691    
692    
693    
694    /*************************************************
695    *             Usage function                     *
696    *************************************************/
697    
698    static void
699    usage(void)
700    {
701    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
702    printf("  -b       show compiled code (bytecode)\n");
703    printf("  -C       show PCRE compile-time options and exit\n");
704    printf("  -d       debug: show compiled code and information (-b and -i)\n");
705    #if !defined NODFA
706    printf("  -dfa     force DFA matching for all subjects\n");
707    #endif
708    printf("  -help    show usage information\n");
709    printf("  -i       show information about compiled patterns\n"
710           "  -m       output memory used information\n"
711           "  -o <n>   set size of offsets vector to <n>\n");
712    #if !defined NOPOSIX
713    printf("  -p       use POSIX interface\n");
714    #endif
715    printf("  -q       quiet: do not output PCRE version number at start\n");
716    printf("  -S <n>   set stack size to <n> megabytes\n");
717    printf("  -s       output store (memory) used information\n"
718           "  -t       time compilation and execution\n");
719    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
720    printf("  -tm      time execution (matching) only\n");
721    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
722    }
723    
724    
725    
726    /*************************************************
727    *                Main Program                    *
728    *************************************************/
729    
730  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
731  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 431  int options = 0; Line 738  int options = 0;
738  int study_options = 0;  int study_options = 0;
739  int op = 1;  int op = 1;
740  int timeit = 0;  int timeit = 0;
741    int timeitm = 0;
742  int showinfo = 0;  int showinfo = 0;
743  int showstore = 0;  int showstore = 0;
744    int quiet = 0;
745  int size_offsets = 45;  int size_offsets = 45;
746  int size_offsets_max;  int size_offsets_max;
747  int *offsets;  int *offsets = NULL;
748  #if !defined NOPOSIX  #if !defined NOPOSIX
749  int posix = 0;  int posix = 0;
750  #endif  #endif
751  int debug = 0;  int debug = 0;
752  int done = 0;  int done = 0;
753  unsigned char buffer[30000];  int all_use_dfa = 0;
754  unsigned char dbuffer[1024];  int yield = 0;
755    int stack_size;
756    
757    /* These vectors store, end-to-end, a list of captured substring names. Assume
758    that 1024 is plenty long enough for the few names we'll be testing. */
759    
760    uschar copynames[1024];
761    uschar getnames[1024];
762    
763    uschar *copynamesptr;
764    uschar *getnamesptr;
765    
766    /* Get buffers from malloc() so that Electric Fence will check their misuse
767    when I am debugging. They grow automatically when very long lines are read. */
768    
769    buffer = (unsigned char *)malloc(buffer_size);
770    dbuffer = (unsigned char *)malloc(buffer_size);
771    pbuffer = (unsigned char *)malloc(buffer_size);
772    
773  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
774    
775  outfile = stdout;  outfile = stdout;
776    
777    /* The following  _setmode() stuff is some Windows magic that tells its runtime
778    library to translate CRLF into a single LF character. At least, that's what
779    I've been told: never having used Windows I take this all on trust. Originally
780    it set 0x8000, but then I was advised that _O_BINARY was better. */
781    
782    #if defined(_WIN32) || defined(WIN32)
783    _setmode( _fileno( stdout ), _O_BINARY );
784    #endif
785    
786  /* Scan options */  /* Scan options */
787    
788  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
789    {    {
790    char *endptr;    unsigned char *endptr;
791    
792    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
793      showstore = 1;      showstore = 1;
794    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
795      else if (strcmp(argv[op], "-b") == 0) debug = 1;
796    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
797    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
798    #if !defined NODFA
799      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
800    #endif
801    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
802        ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
803            *endptr == 0))
804        {
805        op++;
806        argc--;
807        }
808      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
809        {
810        int both = argv[op][2] == 0;
811        int temp;
812        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
813                         *endptr == 0))
814          {
815          timeitm = temp;
816          op++;
817          argc--;
818          }
819        else timeitm = LOOPREPEAT;
820        if (both) timeit = timeitm;
821        }
822      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
823          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
824            *endptr == 0))
825      {      {
826    #if defined(_WIN32) || defined(WIN32)
827        printf("PCRE: -S not supported on this OS\n");
828        exit(1);
829    #else
830        int rc;
831        struct rlimit rlim;
832        getrlimit(RLIMIT_STACK, &rlim);
833        rlim.rlim_cur = stack_size * 1024 * 1024;
834        rc = setrlimit(RLIMIT_STACK, &rlim);
835        if (rc != 0)
836          {
837        printf("PCRE: setrlimit() failed with error %d\n", rc);
838        exit(1);
839          }
840      op++;      op++;
841      argc--;      argc--;
842    #endif
843      }      }
844  #if !defined NOPOSIX  #if !defined NOPOSIX
845    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
846  #endif  #endif
847      else if (strcmp(argv[op], "-C") == 0)
848        {
849        int rc;
850        printf("PCRE version %s\n", pcre_version());
851        printf("Compiled with\n");
852        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
853        printf("  %sUTF-8 support\n", rc? "" : "No ");
854        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
855        printf("  %sUnicode properties support\n", rc? "" : "No ");
856        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
857        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
858          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
859          (rc == -2)? "ANYCRLF" :
860          (rc == -1)? "ANY" : "???");
861        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
862        printf("  Internal link size = %d\n", rc);
863        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
864        printf("  POSIX malloc threshold = %d\n", rc);
865        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
866        printf("  Default match limit = %d\n", rc);
867        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
868        printf("  Default recursion depth limit = %d\n", rc);
869        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
870        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
871        goto EXIT;
872        }
873      else if (strcmp(argv[op], "-help") == 0 ||
874               strcmp(argv[op], "--help") == 0)
875        {
876        usage();
877        goto EXIT;
878        }
879    else    else
880      {      {
881      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
882      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
883      printf("  -d     debug: show compiled code; implies -i\n"      yield = 1;
884             "  -i     show information about compiled pattern\n"      goto EXIT;
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
885      }      }
886    op++;    op++;
887    argc--;    argc--;
# Line 489  while (argc > 1 && argv[op][0] == '-') Line 890  while (argc > 1 && argv[op][0] == '-')
890  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
891    
892  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
893  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
894  if (offsets == NULL)  if (offsets == NULL)
895    {    {
896    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
897      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
898    return 1;    yield = 1;
899      goto EXIT;
900    }    }
901    
902  /* Sort out the input and output files */  /* Sort out the input and output files */
903    
904  if (argc > 1)  if (argc > 1)
905    {    {
906    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
907    if (infile == NULL)    if (infile == NULL)
908      {      {
909      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
910      return 1;      yield = 1;
911        goto EXIT;
912      }      }
913    }    }
914    
915  if (argc > 2)  if (argc > 2)
916    {    {
917    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
918    if (outfile == NULL)    if (outfile == NULL)
919      {      {
920      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
921      return 1;      yield = 1;
922        goto EXIT;
923      }      }
924    }    }
925    
926  /* Set alternative malloc function */  /* Set alternative malloc function */
927    
928  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
929    pcre_free = new_free;
930    pcre_stack_malloc = stack_malloc;
931    pcre_stack_free = stack_free;
932    
933  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
934    
935  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
936    
937  /* Main loop */  /* Main loop */
938    
# Line 541  while (!done) Line 948  while (!done)
948    
949    const char *error;    const char *error;
950    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
951      unsigned char *to_file = NULL;
952    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
953      unsigned long int true_size, true_study_size = 0;
954      size_t size, regex_gotten_store;
955    int do_study = 0;    int do_study = 0;
956    int do_debug = debug;    int do_debug = debug;
957      int debug_lengths = 1;
958    int do_G = 0;    int do_G = 0;
959    int do_g = 0;    int do_g = 0;
960    int do_showinfo = showinfo;    int do_showinfo = showinfo;
961    int do_showrest = 0;    int do_showrest = 0;
962    int utf8 = 0;    int do_flip = 0;
963    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
964    
965      use_utf8 = 0;
966    
967    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
968    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
969    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
970      fflush(outfile);
971    
972    p = buffer;    p = buffer;
973    while (isspace(*p)) p++;    while (isspace(*p)) p++;
974    if (*p == 0) continue;    if (*p == 0) continue;
975    
976    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
977    complete, read more. */  
978      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
979        {
980        unsigned long int magic, get_options;
981        uschar sbuf[8];
982        FILE *f;
983    
984        p++;
985        pp = p + (int)strlen((char *)p);
986        while (isspace(pp[-1])) pp--;
987        *pp = 0;
988    
989        f = fopen((char *)p, "rb");
990        if (f == NULL)
991          {
992          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
993          continue;
994          }
995    
996        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
997    
998        true_size =
999          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1000        true_study_size =
1001          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1002    
1003        re = (real_pcre *)new_malloc(true_size);
1004        regex_gotten_store = gotten_store;
1005    
1006        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1007    
1008        magic = ((real_pcre *)re)->magic_number;
1009        if (magic != MAGIC_NUMBER)
1010          {
1011          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1012            {
1013            do_flip = 1;
1014            }
1015          else
1016            {
1017            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1018            fclose(f);
1019            continue;
1020            }
1021          }
1022    
1023        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1024          do_flip? " (byte-inverted)" : "", p);
1025    
1026        /* Need to know if UTF-8 for printing data strings */
1027    
1028        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1029        use_utf8 = (get_options & PCRE_UTF8) != 0;
1030    
1031        /* Now see if there is any following study data */
1032    
1033        if (true_study_size != 0)
1034          {
1035          pcre_study_data *psd;
1036    
1037          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1038          extra->flags = PCRE_EXTRA_STUDY_DATA;
1039    
1040          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1041          extra->study_data = psd;
1042    
1043          if (fread(psd, 1, true_study_size, f) != true_study_size)
1044            {
1045            FAIL_READ:
1046            fprintf(outfile, "Failed to read data from %s\n", p);
1047            if (extra != NULL) new_free(extra);
1048            if (re != NULL) new_free(re);
1049            fclose(f);
1050            continue;
1051            }
1052          fprintf(outfile, "Study data loaded from %s\n", p);
1053          do_study = 1;     /* To get the data output if requested */
1054          }
1055        else fprintf(outfile, "No study data\n");
1056    
1057        fclose(f);
1058        goto SHOW_INFO;
1059        }
1060    
1061      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1062      the pattern; if is isn't complete, read more. */
1063    
1064    delimiter = *p++;    delimiter = *p++;
1065    
# Line 571  while (!done) Line 1070  while (!done)
1070      }      }
1071    
1072    pp = p;    pp = p;
1073      poffset = p - buffer;
1074    
1075    for(;;)    for(;;)
1076      {      {
# Line 581  while (!done) Line 1081  while (!done)
1081        pp++;        pp++;
1082        }        }
1083      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1084      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1085      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1086        {        {
1087        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1088        done = 1;        done = 1;
# Line 599  while (!done) Line 1091  while (!done)
1091      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1092      }      }
1093    
1094      /* The buffer may have moved while being extended; reset the start of data
1095      pointer to the correct relative point in the buffer. */
1096    
1097      p = buffer + poffset;
1098    
1099    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1100    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1101    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1102    
1103    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1104    
1105    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1106      for callouts. */
1107    
1108    *pp++ = 0;    *pp++ = 0;
1109      strcpy((char *)pbuffer, (char *)p);
1110    
1111    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1112    
# Line 619  while (!done) Line 1118  while (!done)
1118      {      {
1119      switch (*pp++)      switch (*pp++)
1120        {        {
1121          case 'f': options |= PCRE_FIRSTLINE; break;
1122        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1123        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1124        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 627  while (!done) Line 1127  while (!done)
1127    
1128        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1129        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1130          case 'B': do_debug = 1; break;
1131          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1132        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1133        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1134          case 'F': do_flip = 1; break;
1135        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1136        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1137          case 'J': options |= PCRE_DUPNAMES; break;
1138        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1139          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1140    
1141  #if !defined NOPOSIX  #if !defined NOPOSIX
1142        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 640  while (!done) Line 1145  while (!done)
1145        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1146        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1147        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1148        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case 'Z': debug_lengths = 0; break;
1149          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1150          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1151    
1152        case 'L':        case 'L':
1153        ppp = pp;        ppp = pp;
1154        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1155          /* The '0' test is just in case this is an unterminated line. */
1156          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1157        *ppp = 0;        *ppp = 0;
1158        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1159          {          {
1160          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1161          goto SKIP_DATA;          goto SKIP_DATA;
1162            }
1163          locale_set = 1;
1164          tables = pcre_maketables();
1165          pp = ppp;
1166          break;
1167    
1168          case '>':
1169          to_file = pp;
1170          while (*pp != 0) pp++;
1171          while (isspace(pp[-1])) pp--;
1172          *pp = 0;
1173          break;
1174    
1175          case '<':
1176            {
1177            int x = check_newline(pp, outfile);
1178            if (x == 0) goto SKIP_DATA;
1179            options |= x;
1180            while (*pp++ != '>');
1181          }          }
       tables = pcre_maketables();  
       pp = ppp;  
1182        break;        break;
1183    
1184        case '\n': case ' ': break;        case '\r':                      /* So that it works in Windows */
1185          case '\n':
1186          case ' ':
1187          break;
1188    
1189        default:        default:
1190        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1191        goto SKIP_DATA;        goto SKIP_DATA;
# Line 671  while (!done) Line 1201  while (!done)
1201      {      {
1202      int rc;      int rc;
1203      int cflags = 0;      int cflags = 0;
1204    
1205      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1206      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1207        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1208        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1209        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1210    
1211      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1212    
1213      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 680  while (!done) Line 1215  while (!done)
1215    
1216      if (rc != 0)      if (rc != 0)
1217        {        {
1218        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1219        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1220        goto SKIP_DATA;        goto SKIP_DATA;
1221        }        }
# Line 692  while (!done) Line 1227  while (!done)
1227  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1228    
1229      {      {
1230      if (timeit)      if (timeit > 0)
1231        {        {
1232        register int i;        register int i;
1233        clock_t time_taken;        clock_t time_taken;
1234        clock_t start_time = clock();        clock_t start_time = clock();
1235        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1236          {          {
1237          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1238          if (re != NULL) free(re);          if (re != NULL) free(re);
1239          }          }
1240        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1241        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1242          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1243          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1244        }        }
1245    
1246      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 721  while (!done) Line 1256  while (!done)
1256          {          {
1257          for (;;)          for (;;)
1258            {            {
1259            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1260              {              {
1261              done = 1;              done = 1;
1262              goto CONTINUE;              goto CONTINUE;
# Line 739  while (!done) Line 1274  while (!done)
1274      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
1275      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
1276    
1277        if (log_store)
1278          fprintf(outfile, "Memory allocation (code space): %d\n",
1279            (int)(gotten_store -
1280                  sizeof(real_pcre) -
1281                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1282    
1283        /* Extract the size for possible writing before possibly flipping it,
1284        and remember the store that was got. */
1285    
1286        true_size = ((real_pcre *)re)->size;
1287        regex_gotten_store = gotten_store;
1288    
1289        /* If /S was present, study the regexp to generate additional info to
1290        help with the matching. */
1291    
1292        if (do_study)
1293          {
1294          if (timeit > 0)
1295            {
1296            register int i;
1297            clock_t time_taken;
1298            clock_t start_time = clock();
1299            for (i = 0; i < timeit; i++)
1300              extra = pcre_study(re, study_options, &error);
1301            time_taken = clock() - start_time;
1302            if (extra != NULL) free(extra);
1303            fprintf(outfile, "  Study time %.4f milliseconds\n",
1304              (((double)time_taken * 1000.0) / (double)timeit) /
1305                (double)CLOCKS_PER_SEC);
1306            }
1307          extra = pcre_study(re, study_options, &error);
1308          if (error != NULL)
1309            fprintf(outfile, "Failed to study: %s\n", error);
1310          else if (extra != NULL)
1311            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1312          }
1313    
1314        /* If the 'F' option was present, we flip the bytes of all the integer
1315        fields in the regex data block and the study block. This is to make it
1316        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1317        compiled on a different architecture. */
1318    
1319        if (do_flip)
1320          {
1321          real_pcre *rre = (real_pcre *)re;
1322          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1323          rre->size = byteflip(rre->size, sizeof(rre->size));
1324          rre->options = byteflip(rre->options, sizeof(rre->options));
1325          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1326          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1327          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1328          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1329          rre->name_table_offset = byteflip(rre->name_table_offset,
1330            sizeof(rre->name_table_offset));
1331          rre->name_entry_size = byteflip(rre->name_entry_size,
1332            sizeof(rre->name_entry_size));
1333          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1334    
1335          if (extra != NULL)
1336            {
1337            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1338            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1339            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1340            }
1341          }
1342    
1343        /* Extract information from the compiled data if required */
1344    
1345        SHOW_INFO:
1346    
1347        if (do_debug)
1348          {
1349          fprintf(outfile, "------------------------------------------------------------------\n");
1350          pcre_printint(re, outfile, debug_lengths);
1351          }
1352    
1353      if (do_showinfo)      if (do_showinfo)
1354        {        {
1355        unsigned long int get_options;        unsigned long int get_options, all_options;
1356    #if !defined NOINFOCHECK
1357        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1358        int count, backrefmax, first_char, need_char;  #endif
1359        size_t size;        int count, backrefmax, first_char, need_char, okpartial, jchanged;
1360          int nameentrysize, namecount;
1361        if (do_debug) print_internals(re);        const uschar *nametable;
1362    
1363        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1364        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1365        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1366        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1367        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1368        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1369          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1370          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1371          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1372          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1373          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1374    
1375    #if !defined NOINFOCHECK
1376        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1377        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1378          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 772  while (!done) Line 1390  while (!done)
1390            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1391              get_options, old_options);              get_options, old_options);
1392          }          }
1393    #endif
1394    
1395        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1396          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1397          size, gotten_store);          (int)size, (int)regex_gotten_store);
1398    
1399        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1400        if (backrefmax > 0)        if (backrefmax > 0)
1401          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
1402    
1403          if (namecount > 0)
1404            {
1405            fprintf(outfile, "Named capturing subpatterns:\n");
1406            while (namecount-- > 0)
1407              {
1408              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1409                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1410                GET2(nametable, 0));
1411              nametable += nameentrysize;
1412              }
1413            }
1414    
1415          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1416    
1417          all_options = ((real_pcre *)re)->options;
1418          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1419    
1420        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1421          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1422            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1423            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1424            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1425            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1426              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1427            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1428            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1429            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1430            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1431            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1432              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1433              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1434              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1435    
1436          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1437    
1438          switch (get_options & PCRE_NEWLINE_BITS)
1439            {
1440            case PCRE_NEWLINE_CR:
1441            fprintf(outfile, "Forced newline sequence: CR\n");
1442            break;
1443    
1444            case PCRE_NEWLINE_LF:
1445            fprintf(outfile, "Forced newline sequence: LF\n");
1446            break;
1447    
1448        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_CRLF:
1449          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1450            break;
1451    
1452            case PCRE_NEWLINE_ANYCRLF:
1453            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1454            break;
1455    
1456            case PCRE_NEWLINE_ANY:
1457            fprintf(outfile, "Forced newline sequence: ANY\n");
1458            break;
1459    
1460            default:
1461            break;
1462            }
1463    
1464        if (first_char == -1)        if (first_char == -1)
1465          {          {
1466          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1467          }          }
1468        else if (first_char < 0)        else if (first_char < 0)
1469          {          {
# Line 805  while (!done) Line 1471  while (!done)
1471          }          }
1472        else        else
1473          {          {
1474          if (isprint(first_char))          int ch = first_char & 255;
1475            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1476              "" : " (caseless)";
1477            if (PRINTHEX(ch))
1478              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1479          else          else
1480            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1481          }          }
1482    
1483        if (need_char < 0)        if (need_char < 0)
# Line 817  while (!done) Line 1486  while (!done)
1486          }          }
1487        else        else
1488          {          {
1489          if (isprint(need_char))          int ch = need_char & 255;
1490            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1491              "" : " (caseless)";
1492            if (PRINTHEX(ch))
1493              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1494          else          else
1495            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1496          }          }
       }  
1497    
1498      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1499      help with the matching. */        value, but it varies, depending on the computer architecture, and
1500          so messes up the test suite. (And with the /F option, it might be
1501          flipped.) */
1502    
1503      if (do_study)        if (do_study)
       {  
       if (timeit)  
1504          {          {
1505          register int i;          if (extra == NULL)
1506          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1507          clock_t start_time = clock();          else
1508          for (i = 0; i < LOOPREPEAT; i++)            {
1509            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1510          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1511          if (extra != NULL) free(extra);  
1512          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1513            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1514            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1515                {
1516                int i;
1517                int c = 24;
1518                fprintf(outfile, "Starting byte set: ");
1519                for (i = 0; i < 256; i++)
1520                  {
1521                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1522                    {
1523                    if (c > 75)
1524                      {
1525                      fprintf(outfile, "\n  ");
1526                      c = 2;
1527                      }
1528                    if (PRINTHEX(i) && i != ' ')
1529                      {
1530                      fprintf(outfile, "%c ", i);
1531                      c += 2;
1532                      }
1533                    else
1534                      {
1535                      fprintf(outfile, "\\x%02x ", i);
1536                      c += 5;
1537                      }
1538                    }
1539                  }
1540                fprintf(outfile, "\n");
1541                }
1542              }
1543          }          }
1544          }
1545    
1546        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1547        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1548          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1549    
1550        else if (do_showinfo)      if (to_file != NULL)
1551          {
1552          FILE *f = fopen((char *)to_file, "wb");
1553          if (f == NULL)
1554            {
1555            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1556            }
1557          else
1558          {          {
1559          uschar *start_bits = NULL;          uschar sbuf[8];
1560          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (true_size >> 24)  & 255;
1561          if (start_bits == NULL)          sbuf[1] = (true_size >> 16)  & 255;
1562            fprintf(outfile, "No starting character set\n");          sbuf[2] = (true_size >>  8)  & 255;
1563            sbuf[3] = (true_size)  & 255;
1564    
1565            sbuf[4] = (true_study_size >> 24)  & 255;
1566            sbuf[5] = (true_study_size >> 16)  & 255;
1567            sbuf[6] = (true_study_size >>  8)  & 255;
1568            sbuf[7] = (true_study_size)  & 255;
1569    
1570            if (fwrite(sbuf, 1, 8, f) < 8 ||
1571                fwrite(re, 1, true_size, f) < true_size)
1572              {
1573              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1574              }
1575          else          else
1576            {            {
1577            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1578            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1579              {              {
1580              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1581                    true_study_size)
1582                {                {
1583                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1584                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1585                }                }
1586                else fprintf(outfile, "Study data written to %s\n", to_file);
1587    
1588              }              }
           fprintf(outfile, "\n");  
1589            }            }
1590            fclose(f);
1591          }          }
1592    
1593          new_free(re);
1594          if (extra != NULL) new_free(extra);
1595          if (tables != NULL) new_free((void *)tables);
1596          continue;  /* With next regex */
1597        }        }
1598      }      }        /* End of non-POSIX compile */
1599    
1600    /* Read data lines and test them */    /* Read data lines and test them */
1601    
1602    for (;;)    for (;;)
1603      {      {
1604      unsigned char *q;      uschar *q;
1605      unsigned char *bptr = dbuffer;      uschar *bptr;
1606        int *use_offsets = offsets;
1607      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1608        int callout_data = 0;
1609        int callout_data_set = 0;
1610      int count, c;      int count, c;
1611      int copystrings = 0;      int copystrings = 0;
1612        int find_match_limit = 0;
1613      int getstrings = 0;      int getstrings = 0;
1614      int getlist = 0;      int getlist = 0;
1615      int gmatched = 0;      int gmatched = 0;
1616      int start_offset = 0;      int start_offset = 0;
1617      int g_notempty = 0;      int g_notempty = 0;
1618        int use_dfa = 0;
1619    
1620      options = 0;      options = 0;
1621    
1622      if (infile == stdin) printf("data> ");      *copynames = 0;
1623      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1624    
1625        copynamesptr = copynames;
1626        getnamesptr = getnames;
1627    
1628        pcre_callout = callout;
1629        first_callout = 1;
1630        callout_extra = 0;
1631        callout_count = 0;
1632        callout_fail_count = 999999;
1633        callout_fail_id = -1;
1634        show_malloc = 0;
1635    
1636        if (extra != NULL) extra->flags &=
1637          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1638    
1639        len = 0;
1640        for (;;)
1641        {        {
1642        done = 1;        if (infile == stdin) printf("data> ");
1643        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1644            {
1645            if (len > 0) break;
1646            done = 1;
1647            goto CONTINUE;
1648            }
1649          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1650          len = (int)strlen((char *)buffer);
1651          if (buffer[len-1] == '\n') break;
1652        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1653    
     len = (int)strlen((char *)buffer);  
1654      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1655      buffer[len] = 0;      buffer[len] = 0;
1656      if (len == 0) break;      if (len == 0) break;
# Line 920  while (!done) Line 1658  while (!done)
1658      p = buffer;      p = buffer;
1659      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1660    
1661      q = dbuffer;      bptr = q = dbuffer;
1662      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1663        {        {
1664        int i = 0;        int i = 0;
1665        int n = 0;        int n = 0;
1666    
1667        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1668          {          {
1669          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 941  while (!done) Line 1680  while (!done)
1680          c -= '0';          c -= '0';
1681          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1682            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1683    
1684    #if !defined NOUTF8
1685            if (use_utf8 && c > 255)
1686              {
1687              unsigned char buff8[8];
1688              int ii, utn;
1689              utn = ord2utf8(c, buff8);
1690              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1691              c = buff8[ii];   /* Last byte */
1692              }
1693    #endif
1694          break;          break;
1695    
1696          case 'x':          case 'x':
1697    
1698          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1699    
1700    #if !defined NOUTF8
1701          if (*p == '{')          if (*p == '{')
1702            {            {
1703            unsigned char *pt = p;            unsigned char *pt = p;
# Line 955  while (!done) Line 1706  while (!done)
1706              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1707            if (*pt == '}')            if (*pt == '}')
1708              {              {
1709              unsigned char buffer[8];              unsigned char buff8[8];
1710              int ii, utn;              int ii, utn;
1711              utn = ord2utf8(c, buffer);              utn = ord2utf8(c, buff8);
1712              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1713              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1714              p = pt + 1;              p = pt + 1;
1715              break;              break;
1716              }              }
1717            /* Not correct form; fall through */            /* Not correct form; fall through */
1718            }            }
1719    #endif
1720    
1721          /* Ordinary \x */          /* Ordinary \x */
1722    
# Line 976  while (!done) Line 1728  while (!done)
1728            }            }
1729          break;          break;
1730    
1731          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1732          p--;          p--;
1733          continue;          continue;
1734    
1735            case '>':
1736            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1737            continue;
1738    
1739          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1740          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1741          continue;          continue;
# Line 989  while (!done) Line 1745  while (!done)
1745          continue;          continue;
1746    
1747          case 'C':          case 'C':
1748          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1749          copystrings |= 1 << n;            {
1750              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1751              copystrings |= 1 << n;
1752              }
1753            else if (isalnum(*p))
1754              {
1755              uschar *npp = copynamesptr;
1756              while (isalnum(*p)) *npp++ = *p++;
1757              *npp++ = 0;
1758              *npp = 0;
1759              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1760              if (n < 0)
1761                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1762              copynamesptr = npp;
1763              }
1764            else if (*p == '+')
1765              {
1766              callout_extra = 1;
1767              p++;
1768              }
1769            else if (*p == '-')
1770              {
1771              pcre_callout = NULL;
1772              p++;
1773              }
1774            else if (*p == '!')
1775              {
1776              callout_fail_id = 0;
1777              p++;
1778              while(isdigit(*p))
1779                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1780              callout_fail_count = 0;
1781              if (*p == '!')
1782                {
1783                p++;
1784                while(isdigit(*p))
1785                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1786                }
1787              }
1788            else if (*p == '*')
1789              {
1790              int sign = 1;
1791              callout_data = 0;
1792              if (*(++p) == '-') { sign = -1; p++; }
1793              while(isdigit(*p))
1794                callout_data = callout_data * 10 + *p++ - '0';
1795              callout_data *= sign;
1796              callout_data_set = 1;
1797              }
1798            continue;
1799    
1800    #if !defined NODFA
1801            case 'D':
1802    #if !defined NOPOSIX
1803            if (posix || do_posix)
1804              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1805            else
1806    #endif
1807              use_dfa = 1;
1808            continue;
1809    
1810            case 'F':
1811            options |= PCRE_DFA_SHORTEST;
1812          continue;          continue;
1813    #endif
1814    
1815          case 'G':          case 'G':
1816          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1817          getstrings |= 1 << n;            {
1818              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1819              getstrings |= 1 << n;
1820              }
1821            else if (isalnum(*p))
1822              {
1823              uschar *npp = getnamesptr;
1824              while (isalnum(*p)) *npp++ = *p++;
1825              *npp++ = 0;
1826              *npp = 0;
1827              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1828              if (n < 0)
1829                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1830              getnamesptr = npp;
1831              }
1832          continue;          continue;
1833    
1834          case 'L':          case 'L':
1835          getlist = 1;          getlist = 1;
1836          continue;          continue;
1837    
1838            case 'M':
1839            find_match_limit = 1;
1840            continue;
1841    
1842          case 'N':          case 'N':
1843          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1844          continue;          continue;
# Line 1010  while (!done) Line 1847  while (!done)
1847          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1848          if (n > size_offsets_max)          if (n > size_offsets_max)
1849            {            {
   
 if (offsets != NULL)  
   
           free(offsets);  
1850            size_offsets_max = n;            size_offsets_max = n;
1851            offsets = malloc(size_offsets_max * sizeof(int));            free(offsets);
1852              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1853            if (offsets == NULL)            if (offsets == NULL)
1854              {              {
1855              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1856                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1857              return 1;              yield = 1;
1858                goto EXIT;
1859              }              }
1860            }            }
1861          use_size_offsets = n;          use_size_offsets = n;
1862            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1863            continue;
1864    
1865  if (n == 0)          case 'P':
1866    {          options |= PCRE_PARTIAL;
1867    free(offsets);          continue;
1868    offsets = NULL;  
1869    size_offsets_max = 0;          case 'Q':
1870    }          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871            if (extra == NULL)
1872              {
1873              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874              extra->flags = 0;
1875              }
1876            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1877            extra->match_limit_recursion = n;
1878            continue;
1879    
1880            case 'q':
1881            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1882            if (extra == NULL)
1883              {
1884              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1885              extra->flags = 0;
1886              }
1887            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1888            extra->match_limit = n;
1889            continue;
1890    
1891    #if !defined NODFA
1892            case 'R':
1893            options |= PCRE_DFA_RESTART;
1894            continue;
1895    #endif
1896    
1897            case 'S':
1898            show_malloc = 1;
1899          continue;          continue;
1900    
1901          case 'Z':          case 'Z':
1902          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1903          continue;          continue;
1904    
1905            case '?':
1906            options |= PCRE_NO_UTF8_CHECK;
1907            continue;
1908    
1909            case '<':
1910              {
1911              int x = check_newline(p, outfile);
1912              if (x == 0) goto NEXT_DATA;
1913              options |= x;
1914              while (*p++ != '>');
1915              }
1916            continue;
1917          }          }
1918        *q++ = c;        *q++ = c;
1919        }        }
1920      *q = 0;      *q = 0;
1921      len = q - dbuffer;      len = q - dbuffer;
1922    
1923        if ((all_use_dfa || use_dfa) && find_match_limit)
1924          {
1925          printf("**Match limit not relevant for DFA matching: ignored\n");
1926          find_match_limit = 0;
1927          }
1928    
1929      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1930      support timing. */      support timing or playing with the match limit or callout data. */
1931    
1932  #if !defined NOPOSIX  #if !defined NOPOSIX
1933      if (posix || do_posix)      if (posix || do_posix)
1934        {        {
1935        int rc;        int rc;
1936        int eflags = 0;        int eflags = 0;
1937        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
1938          if (use_size_offsets > 0)
1939            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1940        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1941        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1942    
# Line 1059  if (n == 0) Line 1944  if (n == 0)
1944    
1945        if (rc != 0)        if (rc != 0)
1946          {          {
1947          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1948          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1949          }          }
1950          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1951                  != 0)
1952            {
1953            fprintf(outfile, "Matched with REG_NOSUB\n");
1954            }
1955        else        else
1956          {          {
1957          size_t i;          size_t i;
1958          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1959            {            {
1960            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1961              {              {
1962              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1963              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1964                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1965              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1966              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1967                {                {
1968                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1969                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1970                    outfile);
1971                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1972                }                }
1973              }              }
# Line 1092  if (n == 0) Line 1983  if (n == 0)
1983    
1984      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1985        {        {
1986        if (timeit)        if (timeitm > 0)
1987          {          {
1988          register int i;          register int i;
1989          clock_t time_taken;          clock_t time_taken;
1990          clock_t start_time = clock();          clock_t start_time = clock();
1991          for (i = 0; i < LOOPREPEAT; i++)  
1992    #if !defined NODFA
1993            if (all_use_dfa || use_dfa)
1994              {
1995              int workspace[1000];
1996              for (i = 0; i < timeitm; i++)
1997                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1998                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1999                  sizeof(workspace)/sizeof(int));
2000              }
2001            else
2002    #endif
2003    
2004            for (i = 0; i < timeitm; i++)
2005            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2006              start_offset, options | g_notempty, offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2007    
2008          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2009          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2010            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2011            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2012            }
2013    
2014          /* If find_match_limit is set, we want to do repeated matches with
2015          varying limits in order to find the minimum value for the match limit and
2016          for the recursion limit. */
2017    
2018          if (find_match_limit)
2019            {
2020            if (extra == NULL)
2021              {
2022              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2023              extra->flags = 0;
2024              }
2025    
2026            (void)check_match_limit(re, extra, bptr, len, start_offset,
2027              options|g_notempty, use_offsets, use_size_offsets,
2028              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2029              PCRE_ERROR_MATCHLIMIT, "match()");
2030    
2031            count = check_match_limit(re, extra, bptr, len, start_offset,
2032              options|g_notempty, use_offsets, use_size_offsets,
2033              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2034              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2035          }          }
2036    
2037        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
2038          start_offset, options | g_notempty, offsets, use_size_offsets);  
2039          else if (callout_data_set)
2040            {
2041            if (extra == NULL)
2042              {
2043              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2044              extra->flags = 0;
2045              }
2046            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2047            extra->callout_data = &callout_data;
2048            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2049              options | g_notempty, use_offsets, use_size_offsets);
2050            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2051            }
2052    
2053          /* The normal case is just to do the match once, with the default
2054          value of match_limit. */
2055    
2056    #if !defined NODFA
2057          else if (all_use_dfa || use_dfa)
2058            {
2059            int workspace[1000];
2060            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2061              options | g_notempty, use_offsets, use_size_offsets, workspace,
2062              sizeof(workspace)/sizeof(int));
2063            if (count == 0)
2064              {
2065              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2066              count = use_size_offsets/2;
2067              }
2068            }
2069    #endif
2070    
2071        if (count == 0)        else
2072          {          {
2073          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2074          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2075            if (count == 0)
2076              {
2077              fprintf(outfile, "Matched, but too many substrings\n");
2078              count = use_size_offsets/3;
2079              }
2080          }          }
2081    
2082        /* Matched */        /* Matched */
2083    
2084        if (count >= 0)        if (count >= 0)
2085          {          {
2086          int i;          int i, maxcount;
2087    
2088    #if !defined NODFA
2089            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2090    #endif
2091              maxcount = use_size_offsets/3;
2092    
2093            /* This is a check against a lunatic return value. */
2094    
2095            if (count > maxcount)
2096              {
2097              fprintf(outfile,
2098                "** PCRE error: returned count %d is too big for offset size %d\n",
2099                count, use_size_offsets);
2100              count = use_size_offsets/3;
2101              if (do_g || do_G)
2102                {
2103                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2104                do_g = do_G = FALSE;        /* Break g/G loop */
2105                }
2106              }
2107    
2108          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2109            {            {
2110            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2111              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2112            else            else
2113              {              {
2114              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2115              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
2116                  use_offsets[i+1] - use_offsets[i], outfile);
2117              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2118              if (i == 0)              if (i == 0)
2119                {                {
2120                if (do_showrest)                if (do_showrest)
2121                  {                  {
2122                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
2123                  pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2124                      outfile);
2125                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
2126                  }                  }
2127                }                }
# Line 1145  if (n == 0) Line 2132  if (n == 0)
2132            {            {
2133            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2134              {              {
2135              char copybuffer[16];              char copybuffer[256];
2136              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2137                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2138              if (rc < 0)              if (rc < 0)
2139                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 1155  if (n == 0) Line 2142  if (n == 0)
2142              }              }
2143            }            }
2144    
2145            for (copynamesptr = copynames;
2146                 *copynamesptr != 0;
2147                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2148              {
2149              char copybuffer[256];
2150              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2151                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2152              if (rc < 0)
2153                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2154              else
2155                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2156              }
2157    
2158          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2159            {            {
2160            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
2161              {              {
2162              const char *substring;              const char *substring;
2163              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2164                i, &substring);                i, &substring);
2165              if (rc < 0)              if (rc < 0)
2166                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
2167              else              else
2168                {                {
2169                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2170                pcre_free_substring(substring);                pcre_free_substring(substring);
2171                }                }
2172              }              }
2173            }            }
2174    
2175            for (getnamesptr = getnames;
2176                 *getnamesptr != 0;
2177                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2178              {
2179              const char *substring;
2180              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2181                count, (char *)getnamesptr, &substring);
2182              if (rc < 0)
2183                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2184              else
2185                {
2186                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2187                pcre_free_substring(substring);
2188                }
2189              }
2190    
2191          if (getlist)          if (getlist)
2192            {            {
2193            const char **stringlist;            const char **stringlist;
2194            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2195              &stringlist);              &stringlist);
2196            if (rc < 0)            if (rc < 0)
2197              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 1192  if (n == 0) Line 2207  if (n == 0)
2207            }            }
2208          }          }
2209    
2210          /* There was a partial match */
2211    
2212          else if (count == PCRE_ERROR_PARTIAL)
2213            {
2214            fprintf(outfile, "Partial match");
2215    #if !defined NODFA
2216            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2217              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2218                bptr + use_offsets[0]);
2219    #endif
2220            fprintf(outfile, "\n");
2221            break;  /* Out of the /g loop */
2222            }
2223    
2224        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2225        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2226        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
2227        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
2228        was checked before setting g_notempty. */  
2229          Complication arises in the case when the newline option is "any" or
2230          "anycrlf". If the previous match was at the end of a line terminated by
2231          CRLF, an advance of one character just passes the \r, whereas we should
2232          prefer the longer newline sequence, as does the code in pcre_exec().
2233          Fudge the offset value to achieve this.
2234    
2235          Otherwise, in the case of UTF-8 matching, the advance must be one
2236          character, not one byte. */
2237    
2238        else        else
2239          {          {
2240          if (g_notempty != 0)          if (g_notempty != 0)
2241            {            {
2242            offsets[0] = start_offset;            int onechar = 1;
2243            offsets[1] = start_offset + 1;            unsigned int obits = ((real_pcre *)re)->options;
2244              use_offsets[0] = start_offset;
2245              if ((obits & PCRE_NEWLINE_BITS) == 0)
2246                {
2247                int d;
2248                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2249                obits = (d == '\r')? PCRE_NEWLINE_CR :
2250                        (d == '\n')? PCRE_NEWLINE_LF :
2251                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2252                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2253                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2254                }
2255              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2256                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2257                  &&
2258                  start_offset < len - 1 &&
2259                  bptr[start_offset] == '\r' &&
2260                  bptr[start_offset+1] == '\n')
2261                onechar++;
2262              else if (use_utf8)
2263                {
2264                while (start_offset + onechar < len)
2265                  {
2266                  int tb = bptr[start_offset+onechar];
2267                  if (tb <= 127) break;
2268                  tb &= 0xc0;
2269                  if (tb != 0 && tb != 0xc0) onechar++;
2270                  }
2271                }
2272              use_offsets[1] = start_offset + onechar;
2273            }            }
2274          else          else
2275            {            {
2276            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2277              {              {
2278              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
2279              }              }
2280              else fprintf(outfile, "Error %d\n", count);
2281            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2282            }            }
2283          }          }
# Line 1228  if (n == 0) Line 2294  if (n == 0)
2294        character. */        character. */
2295    
2296        g_notempty = 0;        g_notempty = 0;
2297        if (offsets[0] == offsets[1])  
2298          if (use_offsets[0] == use_offsets[1])
2299          {          {
2300          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
2301          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2302          }          }
2303    
2304        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
2305    
2306        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
2307    
2308        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
2309    
2310        else        else
2311          {          {
2312          bptr += offsets[1];          bptr += use_offsets[1];
2313          len -= offsets[1];          len -= use_offsets[1];
2314          }          }
2315        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2316    
2317        NEXT_DATA: continue;
2318      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2319    
2320    CONTINUE:    CONTINUE:
# Line 1254  if (n == 0) Line 2323  if (n == 0)
2323    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2324  #endif  #endif
2325    
2326    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2327    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2328    if (tables != NULL)    if (tables != NULL)
2329      {      {
2330      free((void *)tables);      new_free((void *)tables);
2331      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2332        locale_set = 0;
2333      }      }
2334    }    }
2335    
2336  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2337  return 0;  
2338    EXIT:
2339    
2340    if (infile != NULL && infile != stdin) fclose(infile);
2341    if (outfile != NULL && outfile != stdout) fclose(outfile);
2342    
2343    free(buffer);
2344    free(dbuffer);
2345    free(pbuffer);
2346    free(offsets);
2347    
2348    return yield;
2349  }  }
2350    
2351  /* End */  /* End of pcretest.c */

Legend:
Removed from v.55  
changed lines
  Added in v.199

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12