/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 59 by nigel, Sat Feb 24 21:39:54 2007 UTC revision 151 by ph10, Tue Apr 17 15:07:29 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47    
48    /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
69    
70    /* We have to include pcre_internal.h because we need the internal info for
71    displaying the results of pcre_study() and we also need to know about the
72    internal macros, structures, and other internal data values; pcretest has
73    "inside information" compared to a program that strictly follows the PCRE API.
74    
75    Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80    #include "pcre_internal.h"
81    
82    /* We need access to the data tables that PCRE uses. So as not to have to keep
83    two copies, we include the source file here, changing the names of the external
84    symbols to prevent clashes. */
85    
86    #define _pcre_utf8_table1      utf8_table1
87    #define _pcre_utf8_table1_size utf8_table1_size
88    #define _pcre_utf8_table2      utf8_table2
89    #define _pcre_utf8_table3      utf8_table3
90    #define _pcre_utf8_table4      utf8_table4
91    #define _pcre_utt              utt
92    #define _pcre_utt_size         utt_size
93    #define _pcre_OP_lengths       OP_lengths
94    
95    #include "pcre_tables.c"
96    
97    /* We also need the pcre_printint() function for printing out compiled
98    patterns. This function is in a separate file so that it can be included in
99    pcre_compile.c when that module is compiled with debugging enabled.
100    
101    The definition of the macro PRINTABLE, which determines whether to print an
102    output character as-is or as a hex value when showing compiled patterns, is
103    contained in this file. We uses it here also, in cases when the locale has not
104    been explicitly changed, so as to get consistent output from systems that
105    differ in their output from isprint() even in the "C" locale. */
106    
107  /* Use the internal info for displaying the results of pcre_study(). */  #include "pcre_printint.src"
108    
109    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110    
 #include "internal.h"  
111    
112  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
113  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 117  Makefile. */
117  #include "pcreposix.h"  #include "pcreposix.h"
118  #endif  #endif
119    
120    /* It is also possible, for the benefit of the version currently imported into
121    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122    interface to the DFA matcher (NODFA), and without the doublecheck of the old
123    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124    UTF8 support if PCRE is built without it. */
125    
126    #ifndef SUPPORT_UTF8
127    #ifndef NOUTF8
128    #define NOUTF8
129    #endif
130    #endif
131    
132    
133    /* Other parameters */
134    
135  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
136  #ifdef CLK_TCK  #ifdef CLK_TCK
137  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 140  Makefile. */
140  #endif  #endif
141  #endif  #endif
142    
143  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
144    
145    #define LOOPREPEAT 500000
146    
147    /* Static variables */
148    
149  static FILE *outfile;  static FILE *outfile;
150  static int log_store = 0;  static int log_store = 0;
151    static int callout_count;
152    static int callout_extra;
153    static int callout_fail_count;
154    static int callout_fail_id;
155    static int first_callout;
156    static int locale_set = 0;
157    static int show_malloc;
158    static int use_utf8;
159  static size_t gotten_store;  static size_t gotten_store;
160    
161    /* The buffers grow automatically if very long input lines are encountered. */
162    
163    static int buffer_size = 50000;
164    static uschar *buffer = NULL;
165    static uschar *dbuffer = NULL;
166    static uschar *pbuffer = NULL;
167    
168    
169    
170    /*************************************************
171    *        Read or extend an input line            *
172    *************************************************/
173    
174    /* Input lines are read into buffer, but both patterns and data lines can be
175    continued over multiple input lines. In addition, if the buffer fills up, we
176    want to automatically expand it so as to be able to handle extremely large
177    lines that are needed for certain stress tests. When the input buffer is
178    expanded, the other two buffers must also be expanded likewise, and the
179    contents of pbuffer, which are a copy of the input for callouts, must be
180    preserved (for when expansion happens for a data line). This is not the most
181    optimal way of handling this, but hey, this is just a test program!
182    
183    Arguments:
184      f            the file to read
185      start        where in buffer to start (this *must* be within buffer)
186    
187    Returns:       pointer to the start of new data
188                   could be a copy of start, or could be moved
189                   NULL if no data read and EOF reached
190    */
191    
192    static uschar *
193    extend_inputline(FILE *f, uschar *start)
194    {
195    uschar *here = start;
196    
197    for (;;)
198      {
199      int rlen = buffer_size - (here - buffer);
200    
201      if (rlen > 1000)
202        {
203        int dlen;
204        if (fgets((char *)here, rlen,  f) == NULL)
205          return (here == start)? NULL : start;
206        dlen = (int)strlen((char *)here);
207        if (dlen > 0 && here[dlen - 1] == '\n') return start;
208        here += dlen;
209        }
210    
211      else
212        {
213        int new_buffer_size = 2*buffer_size;
214        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
215        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
216        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
217    
218        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
219          {
220          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
221          exit(1);
222          }
223    
224        memcpy(new_buffer, buffer, buffer_size);
225        memcpy(new_pbuffer, pbuffer, buffer_size);
226    
227        buffer_size = new_buffer_size;
228    
229        start = new_buffer + (start - buffer);
230        here = new_buffer + (here - buffer);
231    
232        free(buffer);
233        free(dbuffer);
234        free(pbuffer);
235    
236        buffer = new_buffer;
237        dbuffer = new_dbuffer;
238        pbuffer = new_pbuffer;
239        }
240      }
241    
242    return NULL;  /* Control never gets here */
243    }
244    
245    
246    
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
247    
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
248    
 static int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
249    
250    
251  /*************************************************  /*************************************************
252  *       Convert character value to UTF-8         *  *          Read number from string               *
253  *************************************************/  *************************************************/
254    
255  /* This function takes an integer value in the range 0 - 0x7fffffff  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256  and encodes it as a UTF-8 character in 0 to 6 bytes.  around with conditional compilation, just do the job by hand. It is only used
257    for unpicking arguments, so just keep it simple.
258    
259  Arguments:  Arguments:
260    cvalue     the character value    str           string to be converted
261    buffer     pointer to buffer for result - at least 6 bytes long    endptr        where to put the end pointer
262    
263  Returns:     number of characters placed in the buffer  Returns:        the unsigned long
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
264  */  */
265    
266  static int  static int
267  ord2utf8(int cvalue, unsigned char *buffer)  get_value(unsigned char *str, unsigned char **endptr)
268  {  {
269  register int i, j;  int result = 0;
270  for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  while(*str != 0 && isspace(*str)) str++;
271    if (cvalue <= utf8_table1[i]) break;  while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
272  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  *endptr = str;
273  if (cvalue < 0) return -1;  return(result);
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
274  }  }
275    
276    
277    
278    
279  /*************************************************  /*************************************************
280  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
281  *************************************************/  *************************************************/
# Line 93  return i + 1; Line 284  return i + 1;
284  and returns the value of the character.  and returns the value of the character.
285    
286  Argument:  Argument:
287    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
288    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
289    
290  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
291             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
292  */  */
293    
294  int  #if !defined NOUTF8
295  utf82ord(unsigned char *buffer, int *vptr)  
296    static int
297    utf82ord(unsigned char *utf8bytes, int *vptr)
298  {  {
299  int c = *buffer++;  int c = *utf8bytes++;
300  int d = c;  int d = c;
301  int i, j, s;  int i, j, s;
302    
# Line 123  d = (c & utf8_table3[i]) << s; Line 316  d = (c & utf8_table3[i]) << s;
316    
317  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
318    {    {
319    c = *buffer++;    c = *utf8bytes++;
320    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
321    s -= 6;    s -= 6;
322    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 131  for (j = 0; j < i; j++) Line 324  for (j = 0; j < i; j++)
324    
325  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
326    
327  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
328    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
329  if (j != i) return -(i+1);  if (j != i) return -(i+1);
330    
# Line 141  if (j != i) return -(i+1); Line 334  if (j != i) return -(i+1);
334  return i+1;  return i+1;
335  }  }
336    
337    #endif
338    
339    
340    
341    /*************************************************
342    *       Convert character value to UTF-8         *
343    *************************************************/
344    
345    /* This function takes an integer value in the range 0 - 0x7fffffff
346    and encodes it as a UTF-8 character in 0 to 6 bytes.
347    
348  /* Debugging function to print the internal form of the regex. This is the same  Arguments:
349  code as contained in pcre.c under the DEBUG macro. */    cvalue     the character value
350      utf8bytes  pointer to buffer for result - at least 6 bytes long
351    
352  static const char *OP_names[] = {  Returns:     number of characters placed in the buffer
353    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  */
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
354    
355    #if !defined NOUTF8
356    
357  static void print_internals(pcre *re)  static int
358    ord2utf8(int cvalue, uschar *utf8bytes)
359  {  {
360  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
361    for (i = 0; i < utf8_table1_size; i++)
362  fprintf(outfile, "------------------------------------------------------------------\n");    if (cvalue <= utf8_table1[i]) break;
363    utf8bytes += i;
364    for (j = i; j > 0; j--)
365     {
366     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
367     cvalue >>= 6;
368     }
369    *utf8bytes = utf8_table2[i] | cvalue;
370    return i + 1;
371    }
372    
373  for(;;)  #endif
   {  
   int c;  
   int charlength;  
374    
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
375    
   if (*code >= OP_BRA)  
     {  
     if (*code - OP_BRA > EXTRACT_BASIC_MAX)  
       fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);  
     else  
       fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
376    
377    else switch(*code)  /*************************************************
378      {  *             Print character string             *
379      case OP_END:  *************************************************/
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
380    
381        CLASS_REF_REPEAT:  /* Character string printing function. Must handle UTF-8 strings in utf8
382    mode. Yields number of characters printed. If handed a NULL file, just counts
383    chars without printing. */
384    
385        switch(*code)  static int pchars(unsigned char *p, int length, FILE *f)
386          {  {
387          case OP_CRSTAR:  int c = 0;
388          case OP_CRMINSTAR:  int yield = 0;
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
389    
390          case OP_CRRANGE:  while (length-- > 0)
391          case OP_CRMINRANGE:    {
392          min = (code[1] << 8) + code[2];  #if !defined NOUTF8
393          max = (code[3] << 8) + code[4];    if (use_utf8)
394          if (max == 0) fprintf(outfile, "{%d,}", min);      {
395          else fprintf(outfile, "{%d,%d}", min, max);      int rc = utf82ord(p, &c);
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
396    
397          default:      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
398          code--;        {
399          length -= rc - 1;
400          p += rc;
401          if (PRINTHEX(c))
402            {
403            if (f != NULL) fprintf(f, "%c", c);
404            yield++;
405          }          }
406          else
407            {
408            int n = 4;
409            if (f != NULL) fprintf(f, "\\x{%02x}", c);
410            yield += (n <= 0x000000ff)? 2 :
411                     (n <= 0x00000fff)? 3 :
412                     (n <= 0x0000ffff)? 4 :
413                     (n <= 0x000fffff)? 5 : 6;
414            }
415          continue;
416        }        }
417      break;      }
418    #endif
419    
420      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
421    
422      default:    c = *p++;
423      fprintf(outfile, "    %s", OP_names[*code]);    if (PRINTHEX(c))
424      break;      {
425        if (f != NULL) fprintf(f, "%c", c);
426        yield++;
427        }
428      else
429        {
430        if (f != NULL) fprintf(f, "\\x%02x", c);
431        yield += 4;
432      }      }
   
   code++;  
   fprintf(outfile, "\n");  
433    }    }
434    
435    return yield;
436  }  }
437    
438    
439    
440  /* Character string printing function. A "normal" and a UTF-8 version. */  /*************************************************
441    *              Callout function                  *
442    *************************************************/
443    
444    /* Called from PCRE as a result of the (?C) item. We print out where we are in
445    the match. Yield zero unless more callouts than the fail count, or the callout
446    data is not zero. */
447    
448  static void pchars(unsigned char *p, int length, int utf8)  static int callout(pcre_callout_block *cb)
449  {  {
450  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
451  while (length-- > 0)  int i, pre_start, post_start, subject_length;
452    
453    if (callout_extra)
454    {    {
455    if (utf8)    fprintf(f, "Callout %d: last capture = %d\n",
456        cb->callout_number, cb->capture_last);
457    
458      for (i = 0; i < cb->capture_top * 2; i += 2)
459      {      {
460      int rc = utf82ord(p, &c);      if (cb->offset_vector[i] < 0)
461      if (rc > 0)        fprintf(f, "%2d: <unset>\n", i/2);
462        else
463        {        {
464        length -= rc - 1;        fprintf(f, "%2d: ", i/2);
465        p += rc;        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
466        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
467          else fprintf(outfile, "\\x{%02x}", c);        fprintf(f, "\n");
       continue;  
468        }        }
469      }      }
470      }
471    
472     /* Not UTF-8, or malformed UTF-8  */  /* Re-print the subject in canonical form, the first time or if giving full
473    datails. On subsequent calls in the same match, we use pchars just to find the
474    printed lengths of the substrings. */
475    
476    if (f != NULL) fprintf(f, "--->");
477    
478    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
479    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
480      cb->current_position - cb->start_match, f);
481    
482    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
483      else fprintf(outfile, "\\x%02x", c);  
484    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
485      cb->subject_length - cb->current_position, f);
486    
487    if (f != NULL) fprintf(f, "\n");
488    
489    /* Always print appropriate indicators, with callout number if not already
490    shown. For automatic callouts, show the pattern offset. */
491    
492    if (cb->callout_number == 255)
493      {
494      fprintf(outfile, "%+3d ", cb->pattern_position);
495      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
496    }    }
497    else
498      {
499      if (callout_extra) fprintf(outfile, "    ");
500        else fprintf(outfile, "%3d ", cb->callout_number);
501      }
502    
503    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
504    fprintf(outfile, "^");
505    
506    if (post_start > 0)
507      {
508      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
509      fprintf(outfile, "^");
510      }
511    
512    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
513      fprintf(outfile, " ");
514    
515    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
516      pbuffer + cb->pattern_position);
517    
518    fprintf(outfile, "\n");
519    first_callout = 0;
520    
521    if (cb->callout_data != NULL)
522      {
523      int callout_data = *((int *)(cb->callout_data));
524      if (callout_data != 0)
525        {
526        fprintf(outfile, "Callout data = %d\n", callout_data);
527        return callout_data;
528        }
529      }
530    
531    return (cb->callout_number != callout_fail_id)? 0 :
532           (++callout_count >= callout_fail_count)? 1 : 0;
533  }  }
534    
535    
536    /*************************************************
537    *            Local malloc functions              *
538    *************************************************/
539    
540  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
541  compiled re. */  compiled re. */
542    
543  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
544  {  {
545    void *block = malloc(size);
546  gotten_store = size;  gotten_store = size;
547  if (log_store)  if (show_malloc)
548    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
549      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
550  return malloc(size);  }
551    
552    static void new_free(void *block)
553    {
554    if (show_malloc)
555      fprintf(outfile, "free             %p\n", block);
556    free(block);
557  }  }
558    
559    
560    /* For recursion malloc/free, to test stacking calls */
561    
562    static void *stack_malloc(size_t size)
563    {
564    void *block = malloc(size);
565    if (show_malloc)
566      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
567    return block;
568    }
569    
570    static void stack_free(void *block)
571    {
572    if (show_malloc)
573      fprintf(outfile, "stack_free       %p\n", block);
574    free(block);
575    }
576    
577    
578    /*************************************************
579    *          Call pcre_fullinfo()                  *
580    *************************************************/
581    
582  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
583    
584  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
# Line 420  if ((rc = pcre_fullinfo(re, study, optio Line 590  if ((rc = pcre_fullinfo(re, study, optio
590    
591    
592    
593    /*************************************************
594    *         Byte flipping function                 *
595    *************************************************/
596    
597    static unsigned long int
598    byteflip(unsigned long int value, int n)
599    {
600    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
601    return ((value & 0x000000ff) << 24) |
602           ((value & 0x0000ff00) <<  8) |
603           ((value & 0x00ff0000) >>  8) |
604           ((value & 0xff000000) >> 24);
605    }
606    
607    
608    
609    
610    /*************************************************
611    *        Check match or recursion limit          *
612    *************************************************/
613    
614    static int
615    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
616      int start_offset, int options, int *use_offsets, int use_size_offsets,
617      int flag, unsigned long int *limit, int errnumber, const char *msg)
618    {
619    int count;
620    int min = 0;
621    int mid = 64;
622    int max = -1;
623    
624    extra->flags |= flag;
625    
626    for (;;)
627      {
628      *limit = mid;
629    
630      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
631        use_offsets, use_size_offsets);
632    
633      if (count == errnumber)
634        {
635        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
636        min = mid;
637        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
638        }
639    
640      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
641                             count == PCRE_ERROR_PARTIAL)
642        {
643        if (mid == min + 1)
644          {
645          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
646          break;
647          }
648        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
649        max = mid;
650        mid = (min + mid)/2;
651        }
652      else break;    /* Some other error */
653      }
654    
655    extra->flags &= ~flag;
656    return count;
657    }
658    
659    
660    
661    /*************************************************
662    *         Check newline indicator                *
663    *************************************************/
664    
665    /* This is used both at compile and run-time to check for <xxx> escapes, where
666    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
667    no match.
668    
669    Arguments:
670      p           points after the leading '<'
671      f           file for error message
672    
673    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
674    */
675    
676    static int
677    check_newline(uschar *p, FILE *f)
678    {
679    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
680    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
681    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
682    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
683    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
684    fprintf(f, "Unknown newline type at: <%s\n", p);
685    return 0;
686    }
687    
688    
689    
690    /*************************************************
691    *             Usage function                     *
692    *************************************************/
693    
694    static void
695    usage(void)
696    {
697    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
698    printf("  -b       show compiled code (bytecode)\n");
699    printf("  -C       show PCRE compile-time options and exit\n");
700    printf("  -d       debug: show compiled code and information (-b and -i)\n");
701    #if !defined NODFA
702    printf("  -dfa     force DFA matching for all subjects\n");
703    #endif
704    printf("  -help    show usage information\n");
705    printf("  -i       show information about compiled patterns\n"
706           "  -m       output memory used information\n"
707           "  -o <n>   set size of offsets vector to <n>\n");
708    #if !defined NOPOSIX
709    printf("  -p       use POSIX interface\n");
710    #endif
711    printf("  -q       quiet: do not output PCRE version number at start\n");
712    printf("  -S <n>   set stack size to <n> megabytes\n");
713    printf("  -s       output store (memory) used information\n"
714           "  -t       time compilation and execution\n");
715    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
716    printf("  -tm      time execution (matching) only\n");
717    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
718    }
719    
720    
721    
722    /*************************************************
723    *                Main Program                    *
724    *************************************************/
725    
726  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
727  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 432  int options = 0; Line 734  int options = 0;
734  int study_options = 0;  int study_options = 0;
735  int op = 1;  int op = 1;
736  int timeit = 0;  int timeit = 0;
737    int timeitm = 0;
738  int showinfo = 0;  int showinfo = 0;
739  int showstore = 0;  int showstore = 0;
740    int quiet = 0;
741  int size_offsets = 45;  int size_offsets = 45;
742  int size_offsets_max;  int size_offsets_max;
743  int *offsets;  int *offsets = NULL;
744  #if !defined NOPOSIX  #if !defined NOPOSIX
745  int posix = 0;  int posix = 0;
746  #endif  #endif
747  int debug = 0;  int debug = 0;
748  int done = 0;  int done = 0;
749  unsigned char buffer[30000];  int all_use_dfa = 0;
750  unsigned char dbuffer[1024];  int yield = 0;
751    int stack_size;
752    
753    /* These vectors store, end-to-end, a list of captured substring names. Assume
754    that 1024 is plenty long enough for the few names we'll be testing. */
755    
756    uschar copynames[1024];
757    uschar getnames[1024];
758    
759    uschar *copynamesptr;
760    uschar *getnamesptr;
761    
762    /* Get buffers from malloc() so that Electric Fence will check their misuse
763    when I am debugging. They grow automatically when very long lines are read. */
764    
765    buffer = (unsigned char *)malloc(buffer_size);
766    dbuffer = (unsigned char *)malloc(buffer_size);
767    pbuffer = (unsigned char *)malloc(buffer_size);
768    
769  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
770    
771  outfile = stdout;  outfile = stdout;
772    
773    /* The following  _setmode() stuff is some Windows magic that tells its runtime
774    library to translate CRLF into a single LF character. At least, that's what
775    I've been told: never having used Windows I take this all on trust. Originally
776    it set 0x8000, but then I was advised that _O_BINARY was better. */
777    
778    #if defined(_WIN32) || defined(WIN32)
779    _setmode( _fileno( stdout ), _O_BINARY );
780    #endif
781    
782  /* Scan options */  /* Scan options */
783    
784  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
785    {    {
786    char *endptr;    unsigned char *endptr;
787    
788    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
789      showstore = 1;      showstore = 1;
790    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
791      else if (strcmp(argv[op], "-b") == 0) debug = 1;
792    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
793    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
794    #if !defined NODFA
795      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
796    #endif
797    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
798        ((size_offsets = (int)strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
799            *endptr == 0))
800      {      {
801      op++;      op++;
802      argc--;      argc--;
803      }      }
804      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
805        {
806        int both = argv[op][2] == 0;
807        int temp;
808        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
809                         *endptr == 0))
810          {
811          timeitm = temp;
812          op++;
813          argc--;
814          }
815        else timeitm = LOOPREPEAT;
816        if (both) timeit = timeitm;
817        }
818      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
819          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
820            *endptr == 0))
821        {
822    #if defined(_WIN32) || defined(WIN32)
823        printf("PCRE: -S not supported on this OS\n");
824        exit(1);
825    #else
826        int rc;
827        struct rlimit rlim;
828        getrlimit(RLIMIT_STACK, &rlim);
829        rlim.rlim_cur = stack_size * 1024 * 1024;
830        rc = setrlimit(RLIMIT_STACK, &rlim);
831        if (rc != 0)
832          {
833        printf("PCRE: setrlimit() failed with error %d\n", rc);
834        exit(1);
835          }
836        op++;
837        argc--;
838    #endif
839        }
840  #if !defined NOPOSIX  #if !defined NOPOSIX
841    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
842  #endif  #endif
843      else if (strcmp(argv[op], "-C") == 0)
844        {
845        int rc;
846        printf("PCRE version %s\n", pcre_version());
847        printf("Compiled with\n");
848        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
849        printf("  %sUTF-8 support\n", rc? "" : "No ");
850        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
851        printf("  %sUnicode properties support\n", rc? "" : "No ");
852        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
853        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
854          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
855          (rc == -2)? "ANYCRLF" :
856          (rc == -1)? "ANY" : "???");
857        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
858        printf("  Internal link size = %d\n", rc);
859        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
860        printf("  POSIX malloc threshold = %d\n", rc);
861        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
862        printf("  Default match limit = %d\n", rc);
863        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
864        printf("  Default recursion depth limit = %d\n", rc);
865        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
866        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
867        goto EXIT;
868        }
869      else if (strcmp(argv[op], "-help") == 0 ||
870               strcmp(argv[op], "--help") == 0)
871        {
872        usage();
873        goto EXIT;
874        }
875    else    else
876      {      {
877      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
878      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
879      printf("  -d     debug: show compiled code; implies -i\n"      yield = 1;
880             "  -i     show information about compiled pattern\n"      goto EXIT;
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
881      }      }
882    op++;    op++;
883    argc--;    argc--;
# Line 490  while (argc > 1 && argv[op][0] == '-') Line 886  while (argc > 1 && argv[op][0] == '-')
886  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
887    
888  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
889  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
890  if (offsets == NULL)  if (offsets == NULL)
891    {    {
892    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
893      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
894    return 1;    yield = 1;
895      goto EXIT;
896    }    }
897    
898  /* Sort out the input and output files */  /* Sort out the input and output files */
899    
900  if (argc > 1)  if (argc > 1)
901    {    {
902    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
903    if (infile == NULL)    if (infile == NULL)
904      {      {
905      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
906      return 1;      yield = 1;
907        goto EXIT;
908      }      }
909    }    }
910    
911  if (argc > 2)  if (argc > 2)
912    {    {
913    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
914    if (outfile == NULL)    if (outfile == NULL)
915      {      {
916      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
917      return 1;      yield = 1;
918        goto EXIT;
919      }      }
920    }    }
921    
922  /* Set alternative malloc function */  /* Set alternative malloc function */
923    
924  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
925    pcre_free = new_free;
926    pcre_stack_malloc = stack_malloc;
927    pcre_stack_free = stack_free;
928    
929  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
930    
931  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
932    
933  /* Main loop */  /* Main loop */
934    
# Line 542  while (!done) Line 944  while (!done)
944    
945    const char *error;    const char *error;
946    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
947      unsigned char *to_file = NULL;
948    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
949      unsigned long int true_size, true_study_size = 0;
950      size_t size, regex_gotten_store;
951    int do_study = 0;    int do_study = 0;
952    int do_debug = debug;    int do_debug = debug;
953      int debug_lengths = 1;
954    int do_G = 0;    int do_G = 0;
955    int do_g = 0;    int do_g = 0;
956    int do_showinfo = showinfo;    int do_showinfo = showinfo;
957    int do_showrest = 0;    int do_showrest = 0;
958    int utf8 = 0;    int do_flip = 0;
959    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
960    
961      use_utf8 = 0;
962    
963    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
964    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
965    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
966      fflush(outfile);
967    
968    p = buffer;    p = buffer;
969    while (isspace(*p)) p++;    while (isspace(*p)) p++;
970    if (*p == 0) continue;    if (*p == 0) continue;
971    
972    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
973    complete, read more. */  
974      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
975        {
976        unsigned long int magic, get_options;
977        uschar sbuf[8];
978        FILE *f;
979    
980        p++;
981        pp = p + (int)strlen((char *)p);
982        while (isspace(pp[-1])) pp--;
983        *pp = 0;
984    
985        f = fopen((char *)p, "rb");
986        if (f == NULL)
987          {
988          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
989          continue;
990          }
991    
992        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
993    
994        true_size =
995          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
996        true_study_size =
997          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
998    
999        re = (real_pcre *)new_malloc(true_size);
1000        regex_gotten_store = gotten_store;
1001    
1002        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1003    
1004        magic = ((real_pcre *)re)->magic_number;
1005        if (magic != MAGIC_NUMBER)
1006          {
1007          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1008            {
1009            do_flip = 1;
1010            }
1011          else
1012            {
1013            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1014            fclose(f);
1015            continue;
1016            }
1017          }
1018    
1019        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1020          do_flip? " (byte-inverted)" : "", p);
1021    
1022        /* Need to know if UTF-8 for printing data strings */
1023    
1024        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1025        use_utf8 = (get_options & PCRE_UTF8) != 0;
1026    
1027        /* Now see if there is any following study data */
1028    
1029        if (true_study_size != 0)
1030          {
1031          pcre_study_data *psd;
1032    
1033          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1034          extra->flags = PCRE_EXTRA_STUDY_DATA;
1035    
1036          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1037          extra->study_data = psd;
1038    
1039          if (fread(psd, 1, true_study_size, f) != true_study_size)
1040            {
1041            FAIL_READ:
1042            fprintf(outfile, "Failed to read data from %s\n", p);
1043            if (extra != NULL) new_free(extra);
1044            if (re != NULL) new_free(re);
1045            fclose(f);
1046            continue;
1047            }
1048          fprintf(outfile, "Study data loaded from %s\n", p);
1049          do_study = 1;     /* To get the data output if requested */
1050          }
1051        else fprintf(outfile, "No study data\n");
1052    
1053        fclose(f);
1054        goto SHOW_INFO;
1055        }
1056    
1057      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1058      the pattern; if is isn't complete, read more. */
1059    
1060    delimiter = *p++;    delimiter = *p++;
1061    
# Line 572  while (!done) Line 1066  while (!done)
1066      }      }
1067    
1068    pp = p;    pp = p;
1069      poffset = p - buffer;
1070    
1071    for(;;)    for(;;)
1072      {      {
# Line 582  while (!done) Line 1077  while (!done)
1077        pp++;        pp++;
1078        }        }
1079      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1080      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1081      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1082        {        {
1083        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1084        done = 1;        done = 1;
# Line 600  while (!done) Line 1087  while (!done)
1087      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1088      }      }
1089    
1090      /* The buffer may have moved while being extended; reset the start of data
1091      pointer to the correct relative point in the buffer. */
1092    
1093      p = buffer + poffset;
1094    
1095    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1096    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1097    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1098    
1099    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1100    
1101    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1102      for callouts. */
1103    
1104    *pp++ = 0;    *pp++ = 0;
1105      strcpy((char *)pbuffer, (char *)p);
1106    
1107    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1108    
# Line 620  while (!done) Line 1114  while (!done)
1114      {      {
1115      switch (*pp++)      switch (*pp++)
1116        {        {
1117          case 'f': options |= PCRE_FIRSTLINE; break;
1118        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1119        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1120        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 628  while (!done) Line 1123  while (!done)
1123    
1124        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1125        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1126          case 'B': do_debug = 1; break;
1127          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1128        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1129        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1130          case 'F': do_flip = 1; break;
1131        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1132        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1133          case 'J': options |= PCRE_DUPNAMES; break;
1134        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1135          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1136    
1137  #if !defined NOPOSIX  #if !defined NOPOSIX
1138        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 641  while (!done) Line 1141  while (!done)
1141        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1142        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1143        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1144        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case 'Z': debug_lengths = 0; break;
1145          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1146          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1147    
1148        case 'L':        case 'L':
1149        ppp = pp;        ppp = pp;
1150        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1151          /* The '0' test is just in case this is an unterminated line. */
1152          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1153        *ppp = 0;        *ppp = 0;
1154        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1155          {          {
1156          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1157          goto SKIP_DATA;          goto SKIP_DATA;
1158          }          }
1159          locale_set = 1;
1160        tables = pcre_maketables();        tables = pcre_maketables();
1161        pp = ppp;        pp = ppp;
1162        break;        break;
1163    
1164        case '\n': case ' ': break;        case '>':
1165          to_file = pp;
1166          while (*pp != 0) pp++;
1167          while (isspace(pp[-1])) pp--;
1168          *pp = 0;
1169          break;
1170    
1171          case '<':
1172            {
1173            int x = check_newline(pp, outfile);
1174            if (x == 0) goto SKIP_DATA;
1175            options |= x;
1176            while (*pp++ != '>');
1177            }
1178          break;
1179    
1180          case '\r':                      /* So that it works in Windows */
1181          case '\n':
1182          case ' ':
1183          break;
1184    
1185        default:        default:
1186        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1187        goto SKIP_DATA;        goto SKIP_DATA;
# Line 672  while (!done) Line 1197  while (!done)
1197      {      {
1198      int rc;      int rc;
1199      int cflags = 0;      int cflags = 0;
1200    
1201      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1202      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1203        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1204        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1205        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1206    
1207      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1208    
1209      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 681  while (!done) Line 1211  while (!done)
1211    
1212      if (rc != 0)      if (rc != 0)
1213        {        {
1214        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1215        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1216        goto SKIP_DATA;        goto SKIP_DATA;
1217        }        }
# Line 693  while (!done) Line 1223  while (!done)
1223  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1224    
1225      {      {
1226      if (timeit)      if (timeit > 0)
1227        {        {
1228        register int i;        register int i;
1229        clock_t time_taken;        clock_t time_taken;
1230        clock_t start_time = clock();        clock_t start_time = clock();
1231        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1232          {          {
1233          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1234          if (re != NULL) free(re);          if (re != NULL) free(re);
1235          }          }
1236        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1237        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1238          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1239          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1240        }        }
1241    
1242      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 722  while (!done) Line 1252  while (!done)
1252          {          {
1253          for (;;)          for (;;)
1254            {            {
1255            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1256              {              {
1257              done = 1;              done = 1;
1258              goto CONTINUE;              goto CONTINUE;
# Line 740  while (!done) Line 1270  while (!done)
1270      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
1271      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
1272    
1273        if (log_store)
1274          fprintf(outfile, "Memory allocation (code space): %d\n",
1275            (int)(gotten_store -
1276                  sizeof(real_pcre) -
1277                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1278    
1279        /* Extract the size for possible writing before possibly flipping it,
1280        and remember the store that was got. */
1281    
1282        true_size = ((real_pcre *)re)->size;
1283        regex_gotten_store = gotten_store;
1284    
1285        /* If /S was present, study the regexp to generate additional info to
1286        help with the matching. */
1287    
1288        if (do_study)
1289          {
1290          if (timeit > 0)
1291            {
1292            register int i;
1293            clock_t time_taken;
1294            clock_t start_time = clock();
1295            for (i = 0; i < timeit; i++)
1296              extra = pcre_study(re, study_options, &error);
1297            time_taken = clock() - start_time;
1298            if (extra != NULL) free(extra);
1299            fprintf(outfile, "  Study time %.4f milliseconds\n",
1300              (((double)time_taken * 1000.0) / (double)timeit) /
1301                (double)CLOCKS_PER_SEC);
1302            }
1303          extra = pcre_study(re, study_options, &error);
1304          if (error != NULL)
1305            fprintf(outfile, "Failed to study: %s\n", error);
1306          else if (extra != NULL)
1307            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1308          }
1309    
1310        /* If the 'F' option was present, we flip the bytes of all the integer
1311        fields in the regex data block and the study block. This is to make it
1312        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1313        compiled on a different architecture. */
1314    
1315        if (do_flip)
1316          {
1317          real_pcre *rre = (real_pcre *)re;
1318          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1319          rre->size = byteflip(rre->size, sizeof(rre->size));
1320          rre->options = byteflip(rre->options, sizeof(rre->options));
1321          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1322          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1323          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1324          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1325          rre->name_table_offset = byteflip(rre->name_table_offset,
1326            sizeof(rre->name_table_offset));
1327          rre->name_entry_size = byteflip(rre->name_entry_size,
1328            sizeof(rre->name_entry_size));
1329          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1330    
1331          if (extra != NULL)
1332            {
1333            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1334            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1335            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1336            }
1337          }
1338    
1339        /* Extract information from the compiled data if required */
1340    
1341        SHOW_INFO:
1342    
1343        if (do_debug)
1344          {
1345          fprintf(outfile, "------------------------------------------------------------------\n");
1346          pcre_printint(re, outfile, debug_lengths);
1347          }
1348    
1349      if (do_showinfo)      if (do_showinfo)
1350        {        {
1351        unsigned long int get_options;        unsigned long int get_options, all_options;
1352    #if !defined NOINFOCHECK
1353        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1354    #endif
1355        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1356        size_t size;        int nameentrysize, namecount;
1357          const uschar *nametable;
       if (do_debug) print_internals(re);  
1358    
1359        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1360        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1361        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1362        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1363        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1364        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1365          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1366          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1367          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1368    
1369    #if !defined NOINFOCHECK
1370        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1371        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1372          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 773  while (!done) Line 1384  while (!done)
1384            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1385              get_options, old_options);              get_options, old_options);
1386          }          }
1387    #endif
1388    
1389        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1390          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1391          size, gotten_store);          (int)size, (int)regex_gotten_store);
1392    
1393        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1394        if (backrefmax > 0)        if (backrefmax > 0)
1395          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
1396    
1397          if (namecount > 0)
1398            {
1399            fprintf(outfile, "Named capturing subpatterns:\n");
1400            while (namecount-- > 0)
1401              {
1402              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1403                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1404                GET2(nametable, 0));
1405              nametable += nameentrysize;
1406              }
1407            }
1408    
1409          /* The NOPARTIAL bit is a private bit in the options, so we have
1410          to fish it out via out back door */
1411    
1412          all_options = ((real_pcre *)re)->options;
1413          if (do_flip)
1414            {
1415            all_options = byteflip(all_options, sizeof(all_options));
1416             }
1417    
1418          if ((all_options & PCRE_NOPARTIAL) != 0)
1419            fprintf(outfile, "Partial matching not supported\n");
1420    
1421        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1422          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1423            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1424            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1425            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1426            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1427              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1428            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1429            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1430            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1431            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1432            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1433              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1434              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1435              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1436    
1437          switch (get_options & PCRE_NEWLINE_BITS)
1438            {
1439            case PCRE_NEWLINE_CR:
1440            fprintf(outfile, "Forced newline sequence: CR\n");
1441            break;
1442    
1443            case PCRE_NEWLINE_LF:
1444            fprintf(outfile, "Forced newline sequence: LF\n");
1445            break;
1446    
1447            case PCRE_NEWLINE_CRLF:
1448            fprintf(outfile, "Forced newline sequence: CRLF\n");
1449            break;
1450    
1451            case PCRE_NEWLINE_ANYCRLF:
1452            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1453            break;
1454    
1455        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_ANY:
1456          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: ANY\n");
1457            break;
1458    
1459            default:
1460            break;
1461            }
1462    
1463        if (first_char == -1)        if (first_char == -1)
1464          {          {
1465          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1466          }          }
1467        else if (first_char < 0)        else if (first_char < 0)
1468          {          {
# Line 806  while (!done) Line 1470  while (!done)
1470          }          }
1471        else        else
1472          {          {
1473          if (isprint(first_char))          int ch = first_char & 255;
1474            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1475              "" : " (caseless)";
1476            if (PRINTHEX(ch))
1477              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1478          else          else
1479            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1480          }          }
1481    
1482        if (need_char < 0)        if (need_char < 0)
# Line 818  while (!done) Line 1485  while (!done)
1485          }          }
1486        else        else
1487          {          {
1488          if (isprint(need_char))          int ch = need_char & 255;
1489            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1490              "" : " (caseless)";
1491            if (PRINTHEX(ch))
1492              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1493          else          else
1494            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1495          }          }
       }  
1496    
1497      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1498      help with the matching. */        value, but it varies, depending on the computer architecture, and
1499          so messes up the test suite. (And with the /F option, it might be
1500          flipped.) */
1501    
1502      if (do_study)        if (do_study)
       {  
       if (timeit)  
1503          {          {
1504          register int i;          if (extra == NULL)
1505          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1506          clock_t start_time = clock();          else
1507          for (i = 0; i < LOOPREPEAT; i++)            {
1508            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1509          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1510          if (extra != NULL) free(extra);  
1511          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1512            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1513            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1514                {
1515                int i;
1516                int c = 24;
1517                fprintf(outfile, "Starting byte set: ");
1518                for (i = 0; i < 256; i++)
1519                  {
1520                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1521                    {
1522                    if (c > 75)
1523                      {
1524                      fprintf(outfile, "\n  ");
1525                      c = 2;
1526                      }
1527                    if (PRINTHEX(i) && i != ' ')
1528                      {
1529                      fprintf(outfile, "%c ", i);
1530                      c += 2;
1531                      }
1532                    else
1533                      {
1534                      fprintf(outfile, "\\x%02x ", i);
1535                      c += 5;
1536                      }
1537                    }
1538                  }
1539                fprintf(outfile, "\n");
1540                }
1541              }
1542          }          }
1543          }
1544    
1545        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1546        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1547          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1548    
1549        else if (do_showinfo)      if (to_file != NULL)
1550          {
1551          FILE *f = fopen((char *)to_file, "wb");
1552          if (f == NULL)
1553            {
1554            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1555            }
1556          else
1557          {          {
1558          uschar *start_bits = NULL;          uschar sbuf[8];
1559          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (true_size >> 24)  & 255;
1560          if (start_bits == NULL)          sbuf[1] = (true_size >> 16)  & 255;
1561            fprintf(outfile, "No starting character set\n");          sbuf[2] = (true_size >>  8)  & 255;
1562            sbuf[3] = (true_size)  & 255;
1563    
1564            sbuf[4] = (true_study_size >> 24)  & 255;
1565            sbuf[5] = (true_study_size >> 16)  & 255;
1566            sbuf[6] = (true_study_size >>  8)  & 255;
1567            sbuf[7] = (true_study_size)  & 255;
1568    
1569            if (fwrite(sbuf, 1, 8, f) < 8 ||
1570                fwrite(re, 1, true_size, f) < true_size)
1571              {
1572              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1573              }
1574          else          else
1575            {            {
1576            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1577            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1578              {              {
1579              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1580                    true_study_size)
1581                {                {
1582                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1583                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1584                }                }
1585                else fprintf(outfile, "Study data written to %s\n", to_file);
1586    
1587              }              }
           fprintf(outfile, "\n");  
1588            }            }
1589            fclose(f);
1590          }          }
1591    
1592          new_free(re);
1593          if (extra != NULL) new_free(extra);
1594          if (tables != NULL) new_free((void *)tables);
1595          continue;  /* With next regex */
1596        }        }
1597      }      }        /* End of non-POSIX compile */
1598    
1599    /* Read data lines and test them */    /* Read data lines and test them */
1600    
1601    for (;;)    for (;;)
1602      {      {
1603      unsigned char *q;      uschar *q;
1604      unsigned char *bptr = dbuffer;      uschar *bptr;
1605      int *use_offsets = offsets;      int *use_offsets = offsets;
1606      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1607        int callout_data = 0;
1608        int callout_data_set = 0;
1609      int count, c;      int count, c;
1610      int copystrings = 0;      int copystrings = 0;
1611        int find_match_limit = 0;
1612      int getstrings = 0;      int getstrings = 0;
1613      int getlist = 0;      int getlist = 0;
1614      int gmatched = 0;      int gmatched = 0;
1615      int start_offset = 0;      int start_offset = 0;
1616      int g_notempty = 0;      int g_notempty = 0;
1617        int use_dfa = 0;
1618    
1619      options = 0;      options = 0;
1620    
1621      if (infile == stdin) printf("data> ");      *copynames = 0;
1622      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1623    
1624        copynamesptr = copynames;
1625        getnamesptr = getnames;
1626    
1627        pcre_callout = callout;
1628        first_callout = 1;
1629        callout_extra = 0;
1630        callout_count = 0;
1631        callout_fail_count = 999999;
1632        callout_fail_id = -1;
1633        show_malloc = 0;
1634    
1635        if (extra != NULL) extra->flags &=
1636          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1637    
1638        len = 0;
1639        for (;;)
1640        {        {
1641        done = 1;        if (infile == stdin) printf("data> ");
1642        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1643            {
1644            if (len > 0) break;
1645            done = 1;
1646            goto CONTINUE;
1647            }
1648          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1649          len = (int)strlen((char *)buffer);
1650          if (buffer[len-1] == '\n') break;
1651        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1652    
     len = (int)strlen((char *)buffer);  
1653      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1654      buffer[len] = 0;      buffer[len] = 0;
1655      if (len == 0) break;      if (len == 0) break;
# Line 922  while (!done) Line 1657  while (!done)
1657      p = buffer;      p = buffer;
1658      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1659    
1660      q = dbuffer;      bptr = q = dbuffer;
1661      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1662        {        {
1663        int i = 0;        int i = 0;
1664        int n = 0;        int n = 0;
1665    
1666        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1667          {          {
1668          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 943  while (!done) Line 1679  while (!done)
1679          c -= '0';          c -= '0';
1680          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1681            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1682    
1683    #if !defined NOUTF8
1684            if (use_utf8 && c > 255)
1685              {
1686              unsigned char buff8[8];
1687              int ii, utn;
1688              utn = ord2utf8(c, buff8);
1689              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1690              c = buff8[ii];   /* Last byte */
1691              }
1692    #endif
1693          break;          break;
1694    
1695          case 'x':          case 'x':
1696    
1697          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1698    
1699    #if !defined NOUTF8
1700          if (*p == '{')          if (*p == '{')
1701            {            {
1702            unsigned char *pt = p;            unsigned char *pt = p;
# Line 957  while (!done) Line 1705  while (!done)
1705              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1706            if (*pt == '}')            if (*pt == '}')
1707              {              {
1708              unsigned char buffer[8];              unsigned char buff8[8];
1709              int ii, utn;              int ii, utn;
1710              utn = ord2utf8(c, buffer);              utn = ord2utf8(c, buff8);
1711              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1712              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1713              p = pt + 1;              p = pt + 1;
1714              break;              break;
1715              }              }
1716            /* Not correct form; fall through */            /* Not correct form; fall through */
1717            }            }
1718    #endif
1719    
1720          /* Ordinary \x */          /* Ordinary \x */
1721    
# Line 978  while (!done) Line 1727  while (!done)
1727            }            }
1728          break;          break;
1729    
1730          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1731          p--;          p--;
1732          continue;          continue;
1733    
1734            case '>':
1735            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1736            continue;
1737    
1738          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1739          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1740          continue;          continue;
# Line 991  while (!done) Line 1744  while (!done)
1744          continue;          continue;
1745    
1746          case 'C':          case 'C':
1747          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1748          copystrings |= 1 << n;            {
1749              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1750              copystrings |= 1 << n;
1751              }
1752            else if (isalnum(*p))
1753              {
1754              uschar *npp = copynamesptr;
1755              while (isalnum(*p)) *npp++ = *p++;
1756              *npp++ = 0;
1757              *npp = 0;
1758              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1759              if (n < 0)
1760                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1761              copynamesptr = npp;
1762              }
1763            else if (*p == '+')
1764              {
1765              callout_extra = 1;
1766              p++;
1767              }
1768            else if (*p == '-')
1769              {
1770              pcre_callout = NULL;
1771              p++;
1772              }
1773            else if (*p == '!')
1774              {
1775              callout_fail_id = 0;
1776              p++;
1777              while(isdigit(*p))
1778                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1779              callout_fail_count = 0;
1780              if (*p == '!')
1781                {
1782                p++;
1783                while(isdigit(*p))
1784                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1785                }
1786              }
1787            else if (*p == '*')
1788              {
1789              int sign = 1;
1790              callout_data = 0;
1791              if (*(++p) == '-') { sign = -1; p++; }
1792              while(isdigit(*p))
1793                callout_data = callout_data * 10 + *p++ - '0';
1794              callout_data *= sign;
1795              callout_data_set = 1;
1796              }
1797            continue;
1798    
1799    #if !defined NODFA
1800            case 'D':
1801    #if !defined NOPOSIX
1802            if (posix || do_posix)
1803              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1804            else
1805    #endif
1806              use_dfa = 1;
1807            continue;
1808    
1809            case 'F':
1810            options |= PCRE_DFA_SHORTEST;
1811          continue;          continue;
1812    #endif
1813    
1814          case 'G':          case 'G':
1815          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1816          getstrings |= 1 << n;            {
1817              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1818              getstrings |= 1 << n;
1819              }
1820            else if (isalnum(*p))
1821              {
1822              uschar *npp = getnamesptr;
1823              while (isalnum(*p)) *npp++ = *p++;
1824              *npp++ = 0;
1825              *npp = 0;
1826              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1827              if (n < 0)
1828                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1829              getnamesptr = npp;
1830              }
1831          continue;          continue;
1832    
1833          case 'L':          case 'L':
1834          getlist = 1;          getlist = 1;
1835          continue;          continue;
1836    
1837            case 'M':
1838            find_match_limit = 1;
1839            continue;
1840    
1841          case 'N':          case 'N':
1842          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1843          continue;          continue;
# Line 1014  while (!done) Line 1848  while (!done)
1848            {            {
1849            size_offsets_max = n;            size_offsets_max = n;
1850            free(offsets);            free(offsets);
1851            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1852            if (offsets == NULL)            if (offsets == NULL)
1853              {              {
1854              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1855                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1856              return 1;              yield = 1;
1857                goto EXIT;
1858              }              }
1859            }            }
1860          use_size_offsets = n;          use_size_offsets = n;
1861          if (n == 0) use_offsets = NULL;          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1862            continue;
1863    
1864            case 'P':
1865            options |= PCRE_PARTIAL;
1866            continue;
1867    
1868            case 'Q':
1869            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1870            if (extra == NULL)
1871              {
1872              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1873              extra->flags = 0;
1874              }
1875            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1876            extra->match_limit_recursion = n;
1877            continue;
1878    
1879            case 'q':
1880            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1881            if (extra == NULL)
1882              {
1883              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1884              extra->flags = 0;
1885              }
1886            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1887            extra->match_limit = n;
1888            continue;
1889    
1890    #if !defined NODFA
1891            case 'R':
1892            options |= PCRE_DFA_RESTART;
1893            continue;
1894    #endif
1895    
1896            case 'S':
1897            show_malloc = 1;
1898          continue;          continue;
1899    
1900          case 'Z':          case 'Z':
1901          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1902          continue;          continue;
1903    
1904            case '?':
1905            options |= PCRE_NO_UTF8_CHECK;
1906            continue;
1907    
1908            case '<':
1909              {
1910              int x = check_newline(p, outfile);
1911              if (x == 0) goto NEXT_DATA;
1912              options |= x;
1913              while (*p++ != '>');
1914              }
1915            continue;
1916          }          }
1917        *q++ = c;        *q++ = c;
1918        }        }
1919      *q = 0;      *q = 0;
1920      len = q - dbuffer;      len = q - dbuffer;
1921    
1922        if ((all_use_dfa || use_dfa) && find_match_limit)
1923          {
1924          printf("**Match limit not relevant for DFA matching: ignored\n");
1925          find_match_limit = 0;
1926          }
1927    
1928      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1929      support timing. */      support timing or playing with the match limit or callout data. */
1930    
1931  #if !defined NOPOSIX  #if !defined NOPOSIX
1932      if (posix || do_posix)      if (posix || do_posix)
1933        {        {
1934        int rc;        int rc;
1935        int eflags = 0;        int eflags = 0;
1936        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
1937          if (use_size_offsets > 0)
1938            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1939        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1940        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1941    
# Line 1051  while (!done) Line 1943  while (!done)
1943    
1944        if (rc != 0)        if (rc != 0)
1945          {          {
1946          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1947          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1948          }          }
1949          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1950                  != 0)
1951            {
1952            fprintf(outfile, "Matched with REG_NOSUB\n");
1953            }
1954        else        else
1955          {          {
1956          size_t i;          size_t i;
1957          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1958            {            {
1959            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1960              {              {
1961              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1962              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1963                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1964              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1965              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1966                {                {
1967                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1968                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1969                    outfile);
1970                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1971                }                }
1972              }              }
# Line 1084  while (!done) Line 1982  while (!done)
1982    
1983      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1984        {        {
1985        if (timeit)        if (timeitm > 0)
1986          {          {
1987          register int i;          register int i;
1988          clock_t time_taken;          clock_t time_taken;
1989          clock_t start_time = clock();          clock_t start_time = clock();
1990          for (i = 0; i < LOOPREPEAT; i++)  
1991    #if !defined NODFA
1992            if (all_use_dfa || use_dfa)
1993              {
1994              int workspace[1000];
1995              for (i = 0; i < timeitm; i++)
1996                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1997                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1998                  sizeof(workspace)/sizeof(int));
1999              }
2000            else
2001    #endif
2002    
2003            for (i = 0; i < timeitm; i++)
2004            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2005              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2006    
2007          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2008          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2009            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2010            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2011            }
2012    
2013          /* If find_match_limit is set, we want to do repeated matches with
2014          varying limits in order to find the minimum value for the match limit and
2015          for the recursion limit. */
2016    
2017          if (find_match_limit)
2018            {
2019            if (extra == NULL)
2020              {
2021              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2022              extra->flags = 0;
2023              }
2024    
2025            (void)check_match_limit(re, extra, bptr, len, start_offset,
2026              options|g_notempty, use_offsets, use_size_offsets,
2027              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2028              PCRE_ERROR_MATCHLIMIT, "match()");
2029    
2030            count = check_match_limit(re, extra, bptr, len, start_offset,
2031              options|g_notempty, use_offsets, use_size_offsets,
2032              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2033              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2034          }          }
2035    
2036        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
2037          start_offset, options | g_notempty, use_offsets, use_size_offsets);  
2038          else if (callout_data_set)
2039            {
2040            if (extra == NULL)
2041              {
2042              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2043              extra->flags = 0;
2044              }
2045            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2046            extra->callout_data = &callout_data;
2047            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2048              options | g_notempty, use_offsets, use_size_offsets);
2049            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2050            }
2051    
2052          /* The normal case is just to do the match once, with the default
2053          value of match_limit. */
2054    
2055    #if !defined NODFA
2056          else if (all_use_dfa || use_dfa)
2057            {
2058            int workspace[1000];
2059            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2060              options | g_notempty, use_offsets, use_size_offsets, workspace,
2061              sizeof(workspace)/sizeof(int));
2062            if (count == 0)
2063              {
2064              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2065              count = use_size_offsets/2;
2066              }
2067            }
2068    #endif
2069    
2070        if (count == 0)        else
2071          {          {
2072          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2073          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2074            if (count == 0)
2075              {
2076              fprintf(outfile, "Matched, but too many substrings\n");
2077              count = use_size_offsets/3;
2078              }
2079          }          }
2080    
2081        /* Matched */        /* Matched */
2082    
2083        if (count >= 0)        if (count >= 0)
2084          {          {
2085          int i;          int i, maxcount;
2086    
2087    #if !defined NODFA
2088            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2089    #endif
2090              maxcount = use_size_offsets/3;
2091    
2092            /* This is a check against a lunatic return value. */
2093    
2094            if (count > maxcount)
2095              {
2096              fprintf(outfile,
2097                "** PCRE error: returned count %d is too big for offset size %d\n",
2098                count, use_size_offsets);
2099              count = use_size_offsets/3;
2100              if (do_g || do_G)
2101                {
2102                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2103                do_g = do_G = FALSE;        /* Break g/G loop */
2104                }
2105              }
2106    
2107          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2108            {            {
2109            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1119  while (!done) Line 2111  while (!done)
2111            else            else
2112              {              {
2113              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2114              pchars(bptr + use_offsets[i], use_offsets[i+1] - use_offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
2115                  use_offsets[i+1] - use_offsets[i], outfile);
2116              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2117              if (i == 0)              if (i == 0)
2118                {                {
2119                if (do_showrest)                if (do_showrest)
2120                  {                  {
2121                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
2122                  pchars(bptr + use_offsets[i+1], len - use_offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2123                      outfile);
2124                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
2125                  }                  }
2126                }                }
# Line 1137  while (!done) Line 2131  while (!done)
2131            {            {
2132            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2133              {              {
2134              char copybuffer[16];              char copybuffer[256];
2135              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2136                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2137              if (rc < 0)              if (rc < 0)
# Line 1147  while (!done) Line 2141  while (!done)
2141              }              }
2142            }            }
2143    
2144            for (copynamesptr = copynames;
2145                 *copynamesptr != 0;
2146                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2147              {
2148              char copybuffer[256];
2149              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2150                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2151              if (rc < 0)
2152                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2153              else
2154                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2155              }
2156    
2157          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2158            {            {
2159            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1159  while (!done) Line 2166  while (!done)
2166              else              else
2167                {                {
2168                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2169                pcre_free_substring(substring);                pcre_free_substring(substring);
2170                }                }
2171              }              }
2172            }            }
2173    
2174            for (getnamesptr = getnames;
2175                 *getnamesptr != 0;
2176                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2177              {
2178              const char *substring;
2179              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2180                count, (char *)getnamesptr, &substring);
2181              if (rc < 0)
2182                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2183              else
2184                {
2185                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2186                pcre_free_substring(substring);
2187                }
2188              }
2189    
2190          if (getlist)          if (getlist)
2191            {            {
2192            const char **stringlist;            const char **stringlist;
# Line 1184  while (!done) Line 2206  while (!done)
2206            }            }
2207          }          }
2208    
2209          /* There was a partial match */
2210    
2211          else if (count == PCRE_ERROR_PARTIAL)
2212            {
2213            fprintf(outfile, "Partial match");
2214    #if !defined NODFA
2215            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2216              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2217                bptr + use_offsets[0]);
2218    #endif
2219            fprintf(outfile, "\n");
2220            break;  /* Out of the /g loop */
2221            }
2222    
2223        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2224        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2225        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
2226        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
2227        was checked before setting g_notempty. */  
2228          Complication arises in the case when the newline option is "any" or
2229          "anycrlf". If the previous match was at the end of a line terminated by
2230          CRLF, an advance of one character just passes the \r, whereas we should
2231          prefer the longer newline sequence, as does the code in pcre_exec().
2232          Fudge the offset value to achieve this.
2233    
2234          Otherwise, in the case of UTF-8 matching, the advance must be one
2235          character, not one byte. */
2236    
2237        else        else
2238          {          {
2239          if (g_notempty != 0)          if (g_notempty != 0)
2240            {            {
2241              int onechar = 1;
2242              unsigned int obits = ((real_pcre *)re)->options;
2243            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2244            use_offsets[1] = start_offset + 1;            if ((obits & PCRE_NEWLINE_BITS) == 0)
2245                {
2246                int d;
2247                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2248                obits = (d == '\r')? PCRE_NEWLINE_CR :
2249                        (d == '\n')? PCRE_NEWLINE_LF :
2250                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2251                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2252                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2253                }
2254              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2255                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2256                  &&
2257                  start_offset < len - 1 &&
2258                  bptr[start_offset] == '\r' &&
2259                  bptr[start_offset+1] == '\n')
2260                onechar++;
2261              else if (use_utf8)
2262                {
2263                while (start_offset + onechar < len)
2264                  {
2265                  int tb = bptr[start_offset+onechar];
2266                  if (tb <= 127) break;
2267                  tb &= 0xc0;
2268                  if (tb != 0 && tb != 0xc0) onechar++;
2269                  }
2270                }
2271              use_offsets[1] = start_offset + onechar;
2272            }            }
2273          else          else
2274            {            {
2275            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2276              {              {
2277              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
2278              }              }
2279              else fprintf(outfile, "Error %d\n", count);
2280            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2281            }            }
2282          }          }
# Line 1220  while (!done) Line 2293  while (!done)
2293        character. */        character. */
2294    
2295        g_notempty = 0;        g_notempty = 0;
2296    
2297        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2298          {          {
2299          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1238  while (!done) Line 2312  while (!done)
2312          len -= use_offsets[1];          len -= use_offsets[1];
2313          }          }
2314        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2315    
2316        NEXT_DATA: continue;
2317      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2318    
2319    CONTINUE:    CONTINUE:
# Line 1246  while (!done) Line 2322  while (!done)
2322    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2323  #endif  #endif
2324    
2325    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2326    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2327    if (tables != NULL)    if (tables != NULL)
2328      {      {
2329      free((void *)tables);      new_free((void *)tables);
2330      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2331        locale_set = 0;
2332      }      }
2333    }    }
2334    
2335  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2336  return 0;  
2337    EXIT:
2338    
2339    if (infile != NULL && infile != stdin) fclose(infile);
2340    if (outfile != NULL && outfile != stdout) fclose(outfile);
2341    
2342    free(buffer);
2343    free(dbuffer);
2344    free(pbuffer);
2345    free(offsets);
2346    
2347    return yield;
2348  }  }
2349    
2350  /* End */  /* End of pcretest.c */

Legend:
Removed from v.59  
changed lines
  Added in v.151

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12