/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 53 by nigel, Sat Feb 24 21:39:42 2007 UTC revision 79 by nigel, Sat Feb 24 21:40:52 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47  /* Use the internal info for displaying the results of pcre_study(). */  #define PCRE_SPY        /* For Win32 build, import data, not export */
48    
49    /* We need the internal info for displaying the results of pcre_study() and
50    other internal data; pcretest also uses some of the fixed tables, and generally
51    has "inside information" compared to a program that strictly follows the PCRE
52    API. */
53    
54    #include "pcre_internal.h"
55    
 #include "internal.h"  
56    
57  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
58  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 62  Makefile. */
62  #include "pcreposix.h"  #include "pcreposix.h"
63  #endif  #endif
64    
65    /* It is also possible, for the benefit of the version imported into Exim, to
66    build pcretest without support for UTF8 (define NOUTF8), without the interface
67    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
68    function (define NOINFOCHECK). */
69    
70    
71  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
72  #ifdef CLK_TCK  #ifdef CLK_TCK
73  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 76  Makefile. */
76  #endif  #endif
77  #endif  #endif
78    
79  #define LOOPREPEAT 20000  #define LOOPREPEAT 500000
80    
81    #define BUFFER_SIZE 30000
82    #define PBUFFER_SIZE BUFFER_SIZE
83    #define DBUFFER_SIZE BUFFER_SIZE
84    
85    
86  static FILE *outfile;  static FILE *outfile;
87  static int log_store = 0;  static int log_store = 0;
88    static int callout_count;
89    static int callout_extra;
90    static int callout_fail_count;
91    static int callout_fail_id;
92    static int first_callout;
93    static int show_malloc;
94    static int use_utf8;
95  static size_t gotten_store;  static size_t gotten_store;
96    
97    static uschar *pbuffer = NULL;
98    
99    
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
   
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
   
100    
101  /*************************************************  /*************************************************
102  *       Convert character value to UTF-8         *  *          Read number from string               *
103  *************************************************/  *************************************************/
104    
105  /* This function takes an integer value in the range 0 - 0x7fffffff  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
106  and encodes it as a UTF-8 character in 0 to 6 bytes.  around with conditional compilation, just do the job by hand. It is only used
107    for unpicking the -o argument, so just keep it simple.
108    
109  Arguments:  Arguments:
110    cvalue     the character value    str           string to be converted
111    buffer     pointer to buffer for result - at least 6 bytes long    endptr        where to put the end pointer
112    
113  Returns:     number of characters placed in the buffer  Returns:        the unsigned long
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
114  */  */
115    
116  static int  static int
117  ord2utf8(int cvalue, unsigned char *buffer)  get_value(unsigned char *str, unsigned char **endptr)
118  {  {
119  register int i, j;  int result = 0;
120  for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  while(*str != 0 && isspace(*str)) str++;
121    if (cvalue <= utf8_table1[i]) break;  while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
122  if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  *endptr = str;
123  if (cvalue < 0) return -1;  return(result);
 *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);  
 cvalue >>= 6 - i;  
 for (j = 0; j < i; j++)  
   {  
   *buffer++ = 0x80 | (cvalue & 0x3f);  
   cvalue >>= 6;  
   }  
 return i + 1;  
124  }  }
125    
126    
127    
128    
129  /*************************************************  /*************************************************
130  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
131  *************************************************/  *************************************************/
# Line 99  Returns: > 0 => the number of bytes c Line 141  Returns: > 0 => the number of bytes c
141             -6 to 0 => malformed UTF-8 character at offset = (-return)             -6 to 0 => malformed UTF-8 character at offset = (-return)
142  */  */
143    
144  int  #if !defined NOUTF8
145    
146    static int
147  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *buffer, int *vptr)
148  {  {
149  int c = *buffer++;  int c = *buffer++;
# Line 117  if (i == 0 || i == 6) return 0; / Line 161  if (i == 0 || i == 6) return 0; /
161    
162  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
163    
164  d = c & utf8_table3[i];  s = 6*i;
165  s = 6 - i;  d = (c & _pcre_utf8_table3[i]) << s;
166    
167  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
168    {    {
169    c = *buffer++;    c = *buffer++;
170    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
171      s -= 6;
172    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
   s += 6;  
173    }    }
174    
175  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
176    
177  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < _pcre_utf8_table1_size; j++)
178    if (d <= utf8_table1[j]) break;    if (d <= _pcre_utf8_table1[j]) break;
179  if (j != i) return -(i+1);  if (j != i) return -(i+1);
180    
181  /* Valid value */  /* Valid value */
# Line 140  if (j != i) return -(i+1); Line 184  if (j != i) return -(i+1);
184  return i+1;  return i+1;
185  }  }
186    
187    #endif
188    
189    
190    
191    /*************************************************
192    *             Print character string             *
193    *************************************************/
194    
195    /* Character string printing function. Must handle UTF-8 strings in utf8
196    mode. Yields number of characters printed. If handed a NULL file, just counts
197    chars without printing. */
198    
199  /* Debugging function to print the internal form of the regex. This is the same  static int pchars(unsigned char *p, int length, FILE *f)
 code as contained in pcre.c under the DEBUG macro. */  
   
 static const char *OP_names[] = {  
   "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Branumber", "Bra"  
 };  
   
   
 static void print_internals(pcre *re)  
200  {  {
201  unsigned char *code = ((real_pcre *)re)->code;  int c;
202    int yield = 0;
 fprintf(outfile, "------------------------------------------------------------------\n");  
203    
204  for(;;)  while (length-- > 0)
205    {    {
206    int c;  #if !defined NOUTF8
207    int charlength;    if (use_utf8)
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
208      {      {
209      if (*code - OP_BRA > EXTRACT_BASIC_MAX)      int rc = utf82ord(p, &c);
       fprintf(outfile, "%3d Bra extra", (code[1] << 8) + code[2]);  
     else  
       fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
210    
211    else switch(*code)      if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
212      {        {
213      case OP_END:        length -= rc - 1;
214      fprintf(outfile, "    %s\n", OP_names[*code]);        p += rc;
215      fprintf(outfile, "------------------------------------------------------------------\n");        if (c < 256 && isprint(c))
216      return;          {
217            if (f != NULL) fprintf(f, "%c", c);
218      case OP_OPT:          yield++;
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     case OP_COND:  
     case OP_BRANUMBER:  
     case OP_REVERSE:  
     case OP_CREF:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", (code[1] << 8) | code[2]);  
     code += 3;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
219          }          }
220        fprintf(outfile, "]");        else
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
221          {          {
222          case OP_CRSTAR:          int n;
223          case OP_CRMINSTAR:          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
224          case OP_CRPLUS:          yield += n;
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
   
         default:  
         code--;  
225          }          }
226          continue;
227        }        }
228      break;      }
229    #endif
230    
231      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
232    
233      default:    if (isprint(c = *(p++)))
234      fprintf(outfile, "    %s", OP_names[*code]);      {
235      break;      if (f != NULL) fprintf(f, "%c", c);
236        yield++;
237        }
238      else
239        {
240        if (f != NULL) fprintf(f, "\\x%02x", c);
241        yield += 4;
242      }      }
   
   code++;  
   fprintf(outfile, "\n");  
243    }    }
244    
245    return yield;
246  }  }
247    
248    
249    
250  /* Character string printing function. A "normal" and a UTF-8 version. */  /*************************************************
251    *              Callout function                  *
252    *************************************************/
253    
254    /* Called from PCRE as a result of the (?C) item. We print out where we are in
255    the match. Yield zero unless more callouts than the fail count, or the callout
256    data is not zero. */
257    
258  static void pchars(unsigned char *p, int length, int utf8)  static int callout(pcre_callout_block *cb)
259  {  {
260  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
261  while (length-- > 0)  int i, pre_start, post_start, subject_length;
262    
263    if (callout_extra)
264    {    {
265    if (utf8)    fprintf(f, "Callout %d: last capture = %d\n",
266        cb->callout_number, cb->capture_last);
267    
268      for (i = 0; i < cb->capture_top * 2; i += 2)
269      {      {
270      int rc = utf82ord(p, &c);      if (cb->offset_vector[i] < 0)
271      if (rc > 0)        fprintf(f, "%2d: <unset>\n", i/2);
272        else
273        {        {
274        length -= rc - 1;        fprintf(f, "%2d: ", i/2);
275        p += rc;        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
276        if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
277          else fprintf(outfile, "\\x{%02x}", c);        fprintf(f, "\n");
       continue;  
278        }        }
279      }      }
280      }
281    
282     /* Not UTF-8, or malformed UTF-8  */  /* Re-print the subject in canonical form, the first time or if giving full
283    datails. On subsequent calls in the same match, we use pchars just to find the
284    printed lengths of the substrings. */
285    
286    if (f != NULL) fprintf(f, "--->");
287    
288    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
289    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
290      cb->current_position - cb->start_match, f);
291    
292    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
293    
294    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
295      cb->subject_length - cb->current_position, f);
296    
297    if (f != NULL) fprintf(f, "\n");
298    
299    /* Always print appropriate indicators, with callout number if not already
300    shown. For automatic callouts, show the pattern offset. */
301    
302    if (cb->callout_number == 255)
303      {
304      fprintf(outfile, "%+3d ", cb->pattern_position);
305      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
306      }
307    else
308      {
309      if (callout_extra) fprintf(outfile, "    ");
310        else fprintf(outfile, "%3d ", cb->callout_number);
311      }
312    
313    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
314    fprintf(outfile, "^");
315    
316    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  if (post_start > 0)
317      else fprintf(outfile, "\\x%02x", c);    {
318      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
319      fprintf(outfile, "^");
320    }    }
321    
322    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
323      fprintf(outfile, " ");
324    
325    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
326      pbuffer + cb->pattern_position);
327    
328    fprintf(outfile, "\n");
329    first_callout = 0;
330    
331    if (cb->callout_data != NULL)
332      {
333      int callout_data = *((int *)(cb->callout_data));
334      if (callout_data != 0)
335        {
336        fprintf(outfile, "Callout data = %d\n", callout_data);
337        return callout_data;
338        }
339      }
340    
341    return (cb->callout_number != callout_fail_id)? 0 :
342           (++callout_count >= callout_fail_count)? 1 : 0;
343  }  }
344    
345    
346    /*************************************************
347    *            Local malloc functions              *
348    *************************************************/
349    
350  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
351  compiled re. */  compiled re. */
352    
353  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
354  {  {
355    void *block = malloc(size);
356  gotten_store = size;  gotten_store = size;
357  if (log_store)  if (show_malloc)
358    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
359      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
360  return malloc(size);  }
361    
362    static void new_free(void *block)
363    {
364    if (show_malloc)
365      fprintf(outfile, "free             %p\n", block);
366    free(block);
367  }  }
368    
369    
370    /* For recursion malloc/free, to test stacking calls */
371    
372    static void *stack_malloc(size_t size)
373    {
374    void *block = malloc(size);
375    if (show_malloc)
376      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
377    return block;
378    }
379    
380    static void stack_free(void *block)
381    {
382    if (show_malloc)
383      fprintf(outfile, "stack_free       %p\n", block);
384    free(block);
385    }
386    
387    
388    /*************************************************
389    *          Call pcre_fullinfo()                  *
390    *************************************************/
391    
392  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
393    
# Line 419  if ((rc = pcre_fullinfo(re, study, optio Line 400  if ((rc = pcre_fullinfo(re, study, optio
400    
401    
402    
403    /*************************************************
404    *         Byte flipping function                 *
405    *************************************************/
406    
407    static long int
408    byteflip(long int value, int n)
409    {
410    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
411    return ((value & 0x000000ff) << 24) |
412           ((value & 0x0000ff00) <<  8) |
413           ((value & 0x00ff0000) >>  8) |
414           ((value & 0xff000000) >> 24);
415    }
416    
417    
418    
419    
420    /*************************************************
421    *                Main Program                    *
422    *************************************************/
423    
424  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
425  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 435  int showinfo = 0; Line 436  int showinfo = 0;
436  int showstore = 0;  int showstore = 0;
437  int size_offsets = 45;  int size_offsets = 45;
438  int size_offsets_max;  int size_offsets_max;
439  int *offsets;  int *offsets = NULL;
440  #if !defined NOPOSIX  #if !defined NOPOSIX
441  int posix = 0;  int posix = 0;
442  #endif  #endif
443  int debug = 0;  int debug = 0;
444  int done = 0;  int done = 0;
445  unsigned char buffer[30000];  int all_use_dfa = 0;
446  unsigned char dbuffer[1024];  int yield = 0;
447    
448    unsigned char *buffer;
449    unsigned char *dbuffer;
450    
451  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
452    when I am debugging. */
453    
454    buffer = (unsigned char *)malloc(BUFFER_SIZE);
455    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
456    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
457    
458    /* The outfile variable is static so that new_malloc can use it. The _setmode()
459    stuff is some magic that I don't understand, but which apparently does good
460    things in Windows. It's related to line terminations.  */
461    
462    #if defined(_WIN32) || defined(WIN32)
463    _setmode( _fileno( stdout ), 0x8000 );
464    #endif  /* defined(_WIN32) || defined(WIN32) */
465    
466  outfile = stdout;  outfile = stdout;
467    
# Line 452  outfile = stdout; Line 469  outfile = stdout;
469    
470  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
471    {    {
472    char *endptr;    unsigned char *endptr;
473    
474    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
475      showstore = 1;      showstore = 1;
476    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
477    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
478    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
479    #if !defined NODFA
480      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
481    #endif
482    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
483        ((size_offsets = strtoul(argv[op+1], &endptr, 10)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
484            *endptr == 0))
485      {      {
486      op++;      op++;
487      argc--;      argc--;
# Line 468  while (argc > 1 && argv[op][0] == '-') Line 489  while (argc > 1 && argv[op][0] == '-')
489  #if !defined NOPOSIX  #if !defined NOPOSIX
490    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
491  #endif  #endif
492      else if (strcmp(argv[op], "-C") == 0)
493        {
494        int rc;
495        printf("PCRE version %s\n", pcre_version());
496        printf("Compiled with\n");
497        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
498        printf("  %sUTF-8 support\n", rc? "" : "No ");
499        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
500        printf("  %sUnicode properties support\n", rc? "" : "No ");
501        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
502        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
503        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
504        printf("  Internal link size = %d\n", rc);
505        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
506        printf("  POSIX malloc threshold = %d\n", rc);
507        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
508        printf("  Default match limit = %d\n", rc);
509        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
510        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
511        exit(0);
512        }
513    else    else
514      {      {
515      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
516      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
517      printf("  -d     debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
518             "  -i     show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n");
519    #if !defined NODFA
520        printf("  -dfa   force DFA matching for all subjects\n");
521    #endif
522        printf("  -i     show information about compiled pattern\n"
523               "  -m     output memory used information\n"
524             "  -o <n> set size of offsets vector to <n>\n");             "  -o <n> set size of offsets vector to <n>\n");
525  #if !defined NOPOSIX  #if !defined NOPOSIX
526      printf("  -p     use POSIX interface\n");      printf("  -p     use POSIX interface\n");
527  #endif  #endif
528      printf("  -s     output store information\n"      printf("  -s     output store (memory) used information\n"
529             "  -t     time compilation and execution\n");             "  -t     time compilation and execution\n");
530      return 1;      yield = 1;
531        goto EXIT;
532      }      }
533    op++;    op++;
534    argc--;    argc--;
# Line 489  while (argc > 1 && argv[op][0] == '-') Line 537  while (argc > 1 && argv[op][0] == '-')
537  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
538    
539  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
540  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
541  if (offsets == NULL)  if (offsets == NULL)
542    {    {
543    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
544      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
545    return 1;    yield = 1;
546      goto EXIT;
547    }    }
548    
549  /* Sort out the input and output files */  /* Sort out the input and output files */
550    
551  if (argc > 1)  if (argc > 1)
552    {    {
553    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
554    if (infile == NULL)    if (infile == NULL)
555      {      {
556      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
557      return 1;      yield = 1;
558        goto EXIT;
559      }      }
560    }    }
561    
562  if (argc > 2)  if (argc > 2)
563    {    {
564    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
565    if (outfile == NULL)    if (outfile == NULL)
566      {      {
567      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
568      return 1;      yield = 1;
569        goto EXIT;
570      }      }
571    }    }
572    
573  /* Set alternative malloc function */  /* Set alternative malloc function */
574    
575  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
576    pcre_free = new_free;
577    pcre_stack_malloc = stack_malloc;
578    pcre_stack_free = stack_free;
579    
580  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
581    
# Line 541  while (!done) Line 595  while (!done)
595    
596    const char *error;    const char *error;
597    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
598      unsigned char *to_file = NULL;
599    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
600      unsigned long int true_size, true_study_size = 0;
601      size_t size, regex_gotten_store;
602    int do_study = 0;    int do_study = 0;
603    int do_debug = debug;    int do_debug = debug;
604    int do_G = 0;    int do_G = 0;
605    int do_g = 0;    int do_g = 0;
606    int do_showinfo = showinfo;    int do_showinfo = showinfo;
607    int do_showrest = 0;    int do_showrest = 0;
608    int utf8 = 0;    int do_flip = 0;
609    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
610    
611      use_utf8 = 0;
612    
613    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
614    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
615    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
616      fflush(outfile);
617    
618    p = buffer;    p = buffer;
619    while (isspace(*p)) p++;    while (isspace(*p)) p++;
620    if (*p == 0) continue;    if (*p == 0) continue;
621    
622    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
623    complete, read more. */  
624      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
625        {
626        unsigned long int magic;
627        uschar sbuf[8];
628        FILE *f;
629    
630        p++;
631        pp = p + (int)strlen((char *)p);
632        while (isspace(pp[-1])) pp--;
633        *pp = 0;
634    
635        f = fopen((char *)p, "rb");
636        if (f == NULL)
637          {
638          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
639          continue;
640          }
641    
642        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
643    
644        true_size =
645          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
646        true_study_size =
647          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
648    
649        re = (real_pcre *)new_malloc(true_size);
650        regex_gotten_store = gotten_store;
651    
652        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
653    
654        magic = ((real_pcre *)re)->magic_number;
655        if (magic != MAGIC_NUMBER)
656          {
657          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
658            {
659            do_flip = 1;
660            }
661          else
662            {
663            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
664            fclose(f);
665            continue;
666            }
667          }
668    
669        fprintf(outfile, "Compiled regex%s loaded from %s\n",
670          do_flip? " (byte-inverted)" : "", p);
671    
672        /* Need to know if UTF-8 for printing data strings */
673    
674        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
675        use_utf8 = (options & PCRE_UTF8) != 0;
676    
677        /* Now see if there is any following study data */
678    
679        if (true_study_size != 0)
680          {
681          pcre_study_data *psd;
682    
683          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
684          extra->flags = PCRE_EXTRA_STUDY_DATA;
685    
686          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
687          extra->study_data = psd;
688    
689          if (fread(psd, 1, true_study_size, f) != true_study_size)
690            {
691            FAIL_READ:
692            fprintf(outfile, "Failed to read data from %s\n", p);
693            if (extra != NULL) new_free(extra);
694            if (re != NULL) new_free(re);
695            fclose(f);
696            continue;
697            }
698          fprintf(outfile, "Study data loaded from %s\n", p);
699          do_study = 1;     /* To get the data output if requested */
700          }
701        else fprintf(outfile, "No study data\n");
702    
703        fclose(f);
704        goto SHOW_INFO;
705        }
706    
707      /* In-line pattern (the usual case). Get the delimiter and seek the end of
708      the pattern; if is isn't complete, read more. */
709    
710    delimiter = *p++;    delimiter = *p++;
711    
# Line 582  while (!done) Line 727  while (!done)
727        }        }
728      if (*pp != 0) break;      if (*pp != 0) break;
729    
730      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
731      if (len < 256)      if (len < 256)
732        {        {
733        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 605  while (!done) Line 750  while (!done)
750    
751    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
752    
753    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
754      for callouts. */
755    
756    *pp++ = 0;    *pp++ = 0;
757      strcpy((char *)pbuffer, (char *)p);
758    
759    /* Look for options after final delimiter */    /* Look for options after final delimiter */
760    
# Line 619  while (!done) Line 766  while (!done)
766      {      {
767      switch (*pp++)      switch (*pp++)
768        {        {
769          case 'f': options |= PCRE_FIRSTLINE; break;
770        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
771        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
772        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 627  while (!done) Line 775  while (!done)
775    
776        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
777        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
778          case 'C': options |= PCRE_AUTO_CALLOUT; break;
779        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
780        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
781          case 'F': do_flip = 1; break;
782        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
783        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
784        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
785          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
786    
787  #if !defined NOPOSIX  #if !defined NOPOSIX
788        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 640  while (!done) Line 791  while (!done)
791        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
792        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
793        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
794        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
795          case '?': options |= PCRE_NO_UTF8_CHECK; break;
796    
797        case 'L':        case 'L':
798        ppp = pp;        ppp = pp;
799        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
800          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
801        *ppp = 0;        *ppp = 0;
802        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
803          {          {
# Line 655  while (!done) Line 808  while (!done)
808        pp = ppp;        pp = ppp;
809        break;        break;
810    
811        case '\n': case ' ': break;        case '>':
812          to_file = pp;
813          while (*pp != 0) pp++;
814          while (isspace(pp[-1])) pp--;
815          *pp = 0;
816          break;
817    
818          case '\r':                      /* So that it works in Windows */
819          case '\n':
820          case ' ':
821          break;
822    
823        default:        default:
824        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
825        goto SKIP_DATA;        goto SKIP_DATA;
# Line 671  while (!done) Line 835  while (!done)
835      {      {
836      int rc;      int rc;
837      int cflags = 0;      int cflags = 0;
838    
839      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
840      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
841        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
842      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
843    
844      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 680  while (!done) Line 846  while (!done)
846    
847      if (rc != 0)      if (rc != 0)
848        {        {
849        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
850        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
851        goto SKIP_DATA;        goto SKIP_DATA;
852        }        }
# Line 704  while (!done) Line 870  while (!done)
870          }          }
871        time_taken = clock() - start_time;        time_taken = clock() - start_time;
872        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
873          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
874          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
875        }        }
876    
877      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 721  while (!done) Line 887  while (!done)
887          {          {
888          for (;;)          for (;;)
889            {            {
890            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
891              {              {
892              done = 1;              done = 1;
893              goto CONTINUE;              goto CONTINUE;
# Line 739  while (!done) Line 905  while (!done)
905      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
906      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
907    
908        if (log_store)
909          fprintf(outfile, "Memory allocation (code space): %d\n",
910            (int)(gotten_store -
911                  sizeof(real_pcre) -
912                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
913    
914        /* Extract the size for possible writing before possibly flipping it,
915        and remember the store that was got. */
916    
917        true_size = ((real_pcre *)re)->size;
918        regex_gotten_store = gotten_store;
919    
920        /* If /S was present, study the regexp to generate additional info to
921        help with the matching. */
922    
923        if (do_study)
924          {
925          if (timeit)
926            {
927            register int i;
928            clock_t time_taken;
929            clock_t start_time = clock();
930            for (i = 0; i < LOOPREPEAT; i++)
931              extra = pcre_study(re, study_options, &error);
932            time_taken = clock() - start_time;
933            if (extra != NULL) free(extra);
934            fprintf(outfile, "  Study time %.3f milliseconds\n",
935              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
936                (double)CLOCKS_PER_SEC);
937            }
938          extra = pcre_study(re, study_options, &error);
939          if (error != NULL)
940            fprintf(outfile, "Failed to study: %s\n", error);
941          else if (extra != NULL)
942            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
943          }
944    
945        /* If the 'F' option was present, we flip the bytes of all the integer
946        fields in the regex data block and the study block. This is to make it
947        possible to test PCRE's handling of byte-flipped patterns, e.g. those
948        compiled on a different architecture. */
949    
950        if (do_flip)
951          {
952          real_pcre *rre = (real_pcre *)re;
953          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
954          rre->size = byteflip(rre->size, sizeof(rre->size));
955          rre->options = byteflip(rre->options, sizeof(rre->options));
956          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
957          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
958          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
959          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
960          rre->name_table_offset = byteflip(rre->name_table_offset,
961            sizeof(rre->name_table_offset));
962          rre->name_entry_size = byteflip(rre->name_entry_size,
963            sizeof(rre->name_entry_size));
964          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
965    
966          if (extra != NULL)
967            {
968            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
969            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
970            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
971            }
972          }
973    
974        /* Extract information from the compiled data if required */
975    
976        SHOW_INFO:
977    
978      if (do_showinfo)      if (do_showinfo)
979        {        {
980        unsigned long int get_options;        unsigned long int get_options, all_options;
981    #if !defined NOINFOCHECK
982        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
983    #endif
984        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
985        size_t size;        int nameentrysize, namecount;
986          const uschar *nametable;
987    
988        if (do_debug) print_internals(re);        if (do_debug)
989            {
990            fprintf(outfile, "------------------------------------------------------------------\n");
991            _pcre_printint(re, outfile);
992            }
993    
994        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
995        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
996        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
997        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
998        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
999        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1000          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1001          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1002          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1003    
1004    #if !defined NOINFOCHECK
1005        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1006        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1007          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 772  while (!done) Line 1019  while (!done)
1019            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1020              get_options, old_options);              get_options, old_options);
1021          }          }
1022    #endif
1023    
1024        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1025          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1026          size, gotten_store);          (int)size, (int)regex_gotten_store);
1027    
1028        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1029        if (backrefmax > 0)        if (backrefmax > 0)
1030          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
1031    
1032          if (namecount > 0)
1033            {
1034            fprintf(outfile, "Named capturing subpatterns:\n");
1035            while (namecount-- > 0)
1036              {
1037              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1038                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1039                GET2(nametable, 0));
1040              nametable += nameentrysize;
1041              }
1042            }
1043    
1044          /* The NOPARTIAL bit is a private bit in the options, so we have
1045          to fish it out via out back door */
1046    
1047          all_options = ((real_pcre *)re)->options;
1048          if (do_flip)
1049            {
1050            all_options = byteflip(all_options, sizeof(all_options));
1051            }
1052    
1053          if ((all_options & PCRE_NOPARTIAL) != 0)
1054            fprintf(outfile, "Partial matching not supported\n");
1055    
1056        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1057          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1058            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1059            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1060            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1061            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1062              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1063            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1064            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1065            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1066            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1067            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1068              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1069    
1070        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1071          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 805  while (!done) Line 1080  while (!done)
1080          }          }
1081        else        else
1082          {          {
1083          if (isprint(first_char))          int ch = first_char & 255;
1084            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1085              "" : " (caseless)";
1086            if (isprint(ch))
1087              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1088          else          else
1089            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1090          }          }
1091    
1092        if (need_char < 0)        if (need_char < 0)
# Line 817  while (!done) Line 1095  while (!done)
1095          }          }
1096        else        else
1097          {          {
1098          if (isprint(need_char))          int ch = need_char & 255;
1099            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1100              "" : " (caseless)";
1101            if (isprint(ch))
1102              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1103          else          else
1104            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1105          }          }
       }  
1106    
1107      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1108      help with the matching. */        value, but it varies, depending on the computer architecture, and
1109          so messes up the test suite. (And with the /F option, it might be
1110          flipped.) */
1111    
1112      if (do_study)        if (do_study)
       {  
       if (timeit)  
1113          {          {
1114          register int i;          if (extra == NULL)
1115          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1116          clock_t start_time = clock();          else
1117          for (i = 0; i < LOOPREPEAT; i++)            {
1118            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1119          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1120          if (extra != NULL) free(extra);  
1121          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1122            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1123            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1124                {
1125                int i;
1126                int c = 24;
1127                fprintf(outfile, "Starting byte set: ");
1128                for (i = 0; i < 256; i++)
1129                  {
1130                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1131                    {
1132                    if (c > 75)
1133                      {
1134                      fprintf(outfile, "\n  ");
1135                      c = 2;
1136                      }
1137                    if (isprint(i) && i != ' ')
1138                      {
1139                      fprintf(outfile, "%c ", i);
1140                      c += 2;
1141                      }
1142                    else
1143                      {
1144                      fprintf(outfile, "\\x%02x ", i);
1145                      c += 5;
1146                      }
1147                    }
1148                  }
1149                fprintf(outfile, "\n");
1150                }
1151              }
1152          }          }
1153          }
1154    
1155        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1156        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1157          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1158    
1159        else if (do_showinfo)      if (to_file != NULL)
1160          {
1161          FILE *f = fopen((char *)to_file, "wb");
1162          if (f == NULL)
1163          {          {
1164          uschar *start_bits = NULL;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1165          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          }
1166          if (start_bits == NULL)        else
1167            fprintf(outfile, "No starting character set\n");          {
1168            uschar sbuf[8];
1169            sbuf[0] = (true_size >> 24)  & 255;
1170            sbuf[1] = (true_size >> 16)  & 255;
1171            sbuf[2] = (true_size >>  8)  & 255;
1172            sbuf[3] = (true_size)  & 255;
1173    
1174            sbuf[4] = (true_study_size >> 24)  & 255;
1175            sbuf[5] = (true_study_size >> 16)  & 255;
1176            sbuf[6] = (true_study_size >>  8)  & 255;
1177            sbuf[7] = (true_study_size)  & 255;
1178    
1179            if (fwrite(sbuf, 1, 8, f) < 8 ||
1180                fwrite(re, 1, true_size, f) < true_size)
1181              {
1182              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1183              }
1184          else          else
1185            {            {
1186            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1187            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1188              {              {
1189              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1190                    true_study_size)
1191                {                {
1192                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1193                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1194                }                }
1195                else fprintf(outfile, "Study data written to %s\n", to_file);
1196              }              }
           fprintf(outfile, "\n");  
1197            }            }
1198            fclose(f);
1199          }          }
1200    
1201          new_free(re);
1202          if (extra != NULL) new_free(extra);
1203          if (tables != NULL) new_free((void *)tables);
1204          continue;  /* With next regex */
1205        }        }
1206      }      }        /* End of non-POSIX compile */
1207    
1208    /* Read data lines and test them */    /* Read data lines and test them */
1209    
# Line 893  while (!done) Line 1211  while (!done)
1211      {      {
1212      unsigned char *q;      unsigned char *q;
1213      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
1214        int *use_offsets = offsets;
1215      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1216        int callout_data = 0;
1217        int callout_data_set = 0;
1218      int count, c;      int count, c;
1219      int copystrings = 0;      int copystrings = 0;
1220        int find_match_limit = 0;
1221      int getstrings = 0;      int getstrings = 0;
1222      int getlist = 0;      int getlist = 0;
1223      int gmatched = 0;      int gmatched = 0;
1224      int start_offset = 0;      int start_offset = 0;
1225      int g_notempty = 0;      int g_notempty = 0;
1226        int use_dfa = 0;
1227    
1228      options = 0;      options = 0;
1229    
1230        pcre_callout = callout;
1231        first_callout = 1;
1232        callout_extra = 0;
1233        callout_count = 0;
1234        callout_fail_count = 999999;
1235        callout_fail_id = -1;
1236        show_malloc = 0;
1237    
1238      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
1239      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1240        {        {
1241        done = 1;        done = 1;
1242        goto CONTINUE;        goto CONTINUE;
# Line 925  while (!done) Line 1256  while (!done)
1256        {        {
1257        int i = 0;        int i = 0;
1258        int n = 0;        int n = 0;
1259    
1260        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1261          {          {
1262          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 947  while (!done) Line 1279  while (!done)
1279    
1280          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1281    
1282    #if !defined NOUTF8
1283          if (*p == '{')          if (*p == '{')
1284            {            {
1285            unsigned char *pt = p;            unsigned char *pt = p;
# Line 955  while (!done) Line 1288  while (!done)
1288              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1289            if (*pt == '}')            if (*pt == '}')
1290              {              {
1291              unsigned char buffer[8];              unsigned char buff8[8];
1292              int ii, utn;              int ii, utn;
1293              utn = ord2utf8(c, buffer);              utn = _pcre_ord2utf8(c, buff8);
1294              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1295              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1296              p = pt + 1;              p = pt + 1;
1297              break;              break;
1298              }              }
1299            /* Not correct form; fall through */            /* Not correct form; fall through */
1300            }            }
1301    #endif
1302    
1303          /* Ordinary \x */          /* Ordinary \x */
1304    
# Line 976  while (!done) Line 1310  while (!done)
1310            }            }
1311          break;          break;
1312    
1313          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1314          p--;          p--;
1315          continue;          continue;
1316    
1317            case '>':
1318            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1319            continue;
1320    
1321          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1322          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1323          continue;          continue;
# Line 989  while (!done) Line 1327  while (!done)
1327          continue;          continue;
1328    
1329          case 'C':          case 'C':
1330          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1331          copystrings |= 1 << n;            {
1332              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1333              copystrings |= 1 << n;
1334              }
1335            else if (isalnum(*p))
1336              {
1337              uschar name[256];
1338              uschar *npp = name;
1339              while (isalnum(*p)) *npp++ = *p++;
1340              *npp = 0;
1341              n = pcre_get_stringnumber(re, (char *)name);
1342              if (n < 0)
1343                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1344              else copystrings |= 1 << n;
1345              }
1346            else if (*p == '+')
1347              {
1348              callout_extra = 1;
1349              p++;
1350              }
1351            else if (*p == '-')
1352              {
1353              pcre_callout = NULL;
1354              p++;
1355              }
1356            else if (*p == '!')
1357              {
1358              callout_fail_id = 0;
1359              p++;
1360              while(isdigit(*p))
1361                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1362              callout_fail_count = 0;
1363              if (*p == '!')
1364                {
1365                p++;
1366                while(isdigit(*p))
1367                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1368                }
1369              }
1370            else if (*p == '*')
1371              {
1372              int sign = 1;
1373              callout_data = 0;
1374              if (*(++p) == '-') { sign = -1; p++; }
1375              while(isdigit(*p))
1376                callout_data = callout_data * 10 + *p++ - '0';
1377              callout_data *= sign;
1378              callout_data_set = 1;
1379              }
1380          continue;          continue;
1381    
1382    #if !defined NODFA
1383            case 'D':
1384    #if !defined NOPOSIX
1385            if (posix || do_posix)
1386              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1387            else
1388    #endif
1389              use_dfa = 1;
1390            continue;
1391    
1392            case 'F':
1393            options |= PCRE_DFA_SHORTEST;
1394            continue;
1395    #endif
1396    
1397          case 'G':          case 'G':
1398          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1399          getstrings |= 1 << n;            {
1400              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1401              getstrings |= 1 << n;
1402              }
1403            else if (isalnum(*p))
1404              {
1405              uschar name[256];
1406              uschar *npp = name;
1407              while (isalnum(*p)) *npp++ = *p++;
1408              *npp = 0;
1409              n = pcre_get_stringnumber(re, (char *)name);
1410              if (n < 0)
1411                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1412              else getstrings |= 1 << n;
1413              }
1414          continue;          continue;
1415    
1416          case 'L':          case 'L':
1417          getlist = 1;          getlist = 1;
1418          continue;          continue;
1419    
1420            case 'M':
1421            find_match_limit = 1;
1422            continue;
1423    
1424          case 'N':          case 'N':
1425          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1426          continue;          continue;
# Line 1010  while (!done) Line 1429  while (!done)
1429          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1430          if (n > size_offsets_max)          if (n > size_offsets_max)
1431            {            {
           free(offsets);  
1432            size_offsets_max = n;            size_offsets_max = n;
1433            offsets = malloc(size_offsets_max * sizeof(int));            free(offsets);
1434              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1435            if (offsets == NULL)            if (offsets == NULL)
1436              {              {
1437              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1438                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1439              return 1;              yield = 1;
1440                goto EXIT;
1441              }              }
1442            }            }
1443          use_size_offsets = n;          use_size_offsets = n;
1444            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1445            continue;
1446    
1447            case 'P':
1448            options |= PCRE_PARTIAL;
1449            continue;
1450    
1451    #if !defined NODFA
1452            case 'R':
1453            options |= PCRE_DFA_RESTART;
1454            continue;
1455    #endif
1456    
1457            case 'S':
1458            show_malloc = 1;
1459          continue;          continue;
1460    
1461          case 'Z':          case 'Z':
1462          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1463          continue;          continue;
1464    
1465            case '?':
1466            options |= PCRE_NO_UTF8_CHECK;
1467            continue;
1468          }          }
1469        *q++ = c;        *q++ = c;
1470        }        }
1471      *q = 0;      *q = 0;
1472      len = q - dbuffer;      len = q - dbuffer;
1473    
1474        if ((all_use_dfa || use_dfa) && find_match_limit)
1475          {
1476          printf("**Match limit not relevant for DFA matching: ignored\n");
1477          find_match_limit = 0;
1478          }
1479    
1480      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1481      support timing. */      support timing or playing with the match limit or callout data. */
1482    
1483  #if !defined NOPOSIX  #if !defined NOPOSIX
1484      if (posix || do_posix)      if (posix || do_posix)
1485        {        {
1486        int rc;        int rc;
1487        int eflags = 0;        int eflags = 0;
1488        regmatch_t *pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);        regmatch_t *pmatch = NULL;
1489          if (use_size_offsets > 0)
1490            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1491        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1492        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1493    
# Line 1048  while (!done) Line 1495  while (!done)
1495    
1496        if (rc != 0)        if (rc != 0)
1497          {          {
1498          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1499          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1500          }          }
1501        else        else
1502          {          {
1503          size_t i;          size_t i;
1504          for (i = 0; i < use_size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1505            {            {
1506            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1507              {              {
1508              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1509              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1510                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1511              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1512              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1513                {                {
1514                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1515                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1516                    outfile);
1517                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1518                }                }
1519              }              }
# Line 1086  while (!done) Line 1534  while (!done)
1534          register int i;          register int i;
1535          clock_t time_taken;          clock_t time_taken;
1536          clock_t start_time = clock();          clock_t start_time = clock();
1537    
1538    #if !defined NODFA
1539            if (all_use_dfa || use_dfa)
1540              {
1541              int workspace[1000];
1542              for (i = 0; i < LOOPREPEAT; i++)
1543                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1544                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1545                  sizeof(workspace)/sizeof(int));
1546              }
1547            else
1548    #endif
1549    
1550          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1551            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1552              start_offset, options | g_notempty, offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1553    
1554          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1555          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1556            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1557            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1558          }          }
1559    
1560        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
1561          start_offset, options | g_notempty, offsets, use_size_offsets);        varying limits in order to find the minimum value. */
1562    
1563        if (count == 0)        if (find_match_limit)
1564          {          {
1565          fprintf(outfile, "Matched, but too many substrings\n");          int min = 0;
1566          count = use_size_offsets/3;          int mid = 64;
1567            int max = -1;
1568    
1569            if (extra == NULL)
1570              {
1571              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1572              extra->flags = 0;
1573              }
1574            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1575    
1576            for (;;)
1577              {
1578              extra->match_limit = mid;
1579              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1580                options | g_notempty, use_offsets, use_size_offsets);
1581              if (count == PCRE_ERROR_MATCHLIMIT)
1582                {
1583                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1584                min = mid;
1585                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1586                }
1587              else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1588                                     count == PCRE_ERROR_PARTIAL)
1589                {
1590                if (mid == min + 1)
1591                  {
1592                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1593                  break;
1594                  }
1595                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1596                max = mid;
1597                mid = (min + mid)/2;
1598                }
1599              else break;    /* Some other error */
1600              }
1601    
1602            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1603            }
1604    
1605          /* If callout_data is set, use the interface with additional data */
1606    
1607          else if (callout_data_set)
1608            {
1609            if (extra == NULL)
1610              {
1611              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1612              extra->flags = 0;
1613              }
1614            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1615            extra->callout_data = &callout_data;
1616            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1617              options | g_notempty, use_offsets, use_size_offsets);
1618            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1619            }
1620    
1621          /* The normal case is just to do the match once, with the default
1622          value of match_limit. */
1623    
1624    #if !defined NODFA
1625          else if (all_use_dfa || use_dfa)
1626            {
1627            int workspace[1000];
1628            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1629              options | g_notempty, use_offsets, use_size_offsets, workspace,
1630              sizeof(workspace)/sizeof(int));
1631            if (count == 0)
1632              {
1633              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1634              count = use_size_offsets/2;
1635              }
1636            }
1637    #endif
1638    
1639          else
1640            {
1641            count = pcre_exec(re, extra, (char *)bptr, len,
1642              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1643            if (count == 0)
1644              {
1645              fprintf(outfile, "Matched, but too many substrings\n");
1646              count = use_size_offsets/3;
1647              }
1648          }          }
1649    
1650        /* Matched */        /* Matched */
# Line 1111  while (!done) Line 1654  while (!done)
1654          int i;          int i;
1655          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1656            {            {
1657            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1658              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1659            else            else
1660              {              {
1661              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1662              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);              (void)pchars(bptr + use_offsets[i],
1663                  use_offsets[i+1] - use_offsets[i], outfile);
1664              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1665              if (i == 0)              if (i == 0)
1666                {                {
1667                if (do_showrest)                if (do_showrest)
1668                  {                  {
1669                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1670                  pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1671                      outfile);
1672                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1673                  }                  }
1674                }                }
# Line 1135  while (!done) Line 1680  while (!done)
1680            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1681              {              {
1682              char copybuffer[16];              char copybuffer[16];
1683              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1684                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
1685              if (rc < 0)              if (rc < 0)
1686                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 1149  while (!done) Line 1694  while (!done)
1694            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1695              {              {
1696              const char *substring;              const char *substring;
1697              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1698                i, &substring);                i, &substring);
1699              if (rc < 0)              if (rc < 0)
1700                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
# Line 1165  while (!done) Line 1710  while (!done)
1710          if (getlist)          if (getlist)
1711            {            {
1712            const char **stringlist;            const char **stringlist;
1713            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1714              &stringlist);              &stringlist);
1715            if (rc < 0)            if (rc < 0)
1716              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 1181  while (!done) Line 1726  while (!done)
1726            }            }
1727          }          }
1728    
1729          /* There was a partial match */
1730    
1731          else if (count == PCRE_ERROR_PARTIAL)
1732            {
1733            fprintf(outfile, "Partial match");
1734    #if !defined NODFA
1735            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1736              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1737                bptr + use_offsets[0]);
1738    #endif
1739            fprintf(outfile, "\n");
1740            break;  /* Out of the /g loop */
1741            }
1742    
1743        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1744        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1745        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1746        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1747        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
1748          that was checked before setting g_notempty. */
1749    
1750        else        else
1751          {          {
1752          if (g_notempty != 0)          if (g_notempty != 0)
1753            {            {
1754            offsets[0] = start_offset;            int onechar = 1;
1755            offsets[1] = start_offset + 1;            use_offsets[0] = start_offset;
1756              if (use_utf8)
1757                {
1758                while (start_offset + onechar < len)
1759                  {
1760                  int tb = bptr[start_offset+onechar];
1761                  if (tb <= 127) break;
1762                  tb &= 0xc0;
1763                  if (tb != 0 && tb != 0xc0) onechar++;
1764                  }
1765                }
1766              use_offsets[1] = start_offset + onechar;
1767            }            }
1768          else          else
1769            {            {
1770            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1771              {              {
1772              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1773              }              }
1774              else fprintf(outfile, "Error %d\n", count);
1775            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1776            }            }
1777          }          }
# Line 1217  while (!done) Line 1788  while (!done)
1788        character. */        character. */
1789    
1790        g_notempty = 0;        g_notempty = 0;
1791        if (offsets[0] == offsets[1])        if (use_offsets[0] == use_offsets[1])
1792          {          {
1793          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
1794          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1795          }          }
1796    
1797        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1798    
1799        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
1800    
1801        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
1802    
1803        else        else
1804          {          {
1805          bptr += offsets[1];          bptr += use_offsets[1];
1806          len -= offsets[1];          len -= use_offsets[1];
1807          }          }
1808        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
1809      }    /* End of loop for data lines */      }    /* End of loop for data lines */
# Line 1243  while (!done) Line 1814  while (!done)
1814    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1815  #endif  #endif
1816    
1817    if (re != NULL) free(re);    if (re != NULL) new_free(re);
1818    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
1819    if (tables != NULL)    if (tables != NULL)
1820      {      {
1821      free((void *)tables);      new_free((void *)tables);
1822      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1823      }      }
1824    }    }
1825    
1826  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1827  return 0;  
1828    EXIT:
1829    
1830    if (infile != NULL && infile != stdin) fclose(infile);
1831    if (outfile != NULL && outfile != stdout) fclose(outfile);
1832    
1833    free(buffer);
1834    free(dbuffer);
1835    free(pbuffer);
1836    free(offsets);
1837    
1838    return yield;
1839  }  }
1840    
1841  /* End */  /* End of pcretest.c */

Legend:
Removed from v.53  
changed lines
  Added in v.79

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12