/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 29 by nigel, Sat Feb 24 21:38:53 2007 UTC revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47    #define PCRE_SPY        /* For Win32 build, import data, not export */
48    
49    /* We include pcre_internal.h because we need the internal info for displaying
50    the results of pcre_study() and we also need to know about the internal
51    macros, structures, and other internal data values; pcretest has "inside
52    information" compared to a program that strictly follows the PCRE API. */
53    
54    #include "pcre_internal.h"
55    
56    /* We need access to the data tables that PCRE uses. So as not to have to keep
57    two copies, we include the source file here, changing the names of the external
58    symbols to prevent clashes. */
59    
60    #define _pcre_utf8_table1      utf8_table1
61    #define _pcre_utf8_table1_size utf8_table1_size
62    #define _pcre_utf8_table2      utf8_table2
63    #define _pcre_utf8_table3      utf8_table3
64    #define _pcre_utf8_table4      utf8_table4
65    #define _pcre_utt              utt
66    #define _pcre_utt_size         utt_size
67    #define _pcre_OP_lengths       OP_lengths
68    
69    #include "pcre_tables.c"
70    
71  /* Use the internal info for displaying the results of pcre_study(). */  /* We also need the pcre_printint() function for printing out compiled
72    patterns. This function is in a separate file so that it can be included in
73    pcre_compile.c when that module is compiled with debugging enabled. */
74    
75  #include "internal.h"  #include "pcre_printint.src"
76    
77    
78    /* It is possible to compile this test program without including support for
79    testing the POSIX interface, though this is not available via the standard
80    Makefile. */
81    
82    #if !defined NOPOSIX
83  #include "pcreposix.h"  #include "pcreposix.h"
84    #endif
85    
86    /* It is also possible, for the benefit of the version imported into Exim, to
87    build pcretest without support for UTF8 (define NOUTF8), without the interface
88    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
89    function (define NOINFOCHECK). */
90    
91    
92    /* Other parameters */
93    
94  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
95  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 99 
99  #endif  #endif
100  #endif  #endif
101    
102  #define LOOPREPEAT 20000  #define LOOPREPEAT 500000
103    
104    #define BUFFER_SIZE 30000
105    #define PBUFFER_SIZE BUFFER_SIZE
106    #define DBUFFER_SIZE BUFFER_SIZE
107    
108    
109    /* Static variables */
110    
111  static FILE *outfile;  static FILE *outfile;
112  static int log_store = 0;  static int log_store = 0;
113    static int callout_count;
114    static int callout_extra;
115    static int callout_fail_count;
116    static int callout_fail_id;
117    static int first_callout;
118    static int show_malloc;
119    static int use_utf8;
120    static size_t gotten_store;
121    
122    static uschar *pbuffer = NULL;
123    
124    
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
125    
126  static const char *OP_names[] = {  /*************************************************
127    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  *          Read number from string               *
128    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  *************************************************/
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
   
   
 static void print_internals(pcre *re, FILE *outfile)  
 {  
 unsigned char *code = ((real_pcre *)re)->code;  
   
 fprintf(outfile, "------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
129    
130          case OP_CRRANGE:  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
131          case OP_CRMINRANGE:  around with conditional compilation, just do the job by hand. It is only used
132          min = (code[1] << 8) + code[2];  for unpicking the -o argument, so just keep it simple.
133          max = (code[3] << 8) + code[4];  
134          if (max == 0) fprintf(outfile, "{%d,}", min);  Arguments:
135          else fprintf(outfile, "{%d,%d}", min, max);    str           string to be converted
136          if (*code == OP_CRMINRANGE) fprintf(outfile, "?");    endptr        where to put the end pointer
137          code += 4;  
138          break;  Returns:        the unsigned long
139    */
140    
141    static int
142    get_value(unsigned char *str, unsigned char **endptr)
143    {
144    int result = 0;
145    while(*str != 0 && isspace(*str)) str++;
146    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
147    *endptr = str;
148    return(result);
149    }
150    
151    
152    
153    
154    /*************************************************
155    *            Convert UTF-8 string to value       *
156    *************************************************/
157    
158    /* This function takes one or more bytes that represents a UTF-8 character,
159    and returns the value of the character.
160    
161    Argument:
162      buffer   a pointer to the byte vector
163      vptr     a pointer to an int to receive the value
164    
165    Returns:   >  0 => the number of bytes consumed
166               -6 to 0 => malformed UTF-8 character at offset = (-return)
167    */
168    
169    #if !defined NOUTF8
170    
171    static int
172    utf82ord(unsigned char *buffer, int *vptr)
173    {
174    int c = *buffer++;
175    int d = c;
176    int i, j, s;
177    
178    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
179      {
180      if ((d & 0x80) == 0) break;
181      d <<= 1;
182      }
183    
184    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
185    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
186    
187    /* i now has a value in the range 1-5 */
188    
189    s = 6*i;
190    d = (c & utf8_table3[i]) << s;
191    
192    for (j = 0; j < i; j++)
193      {
194      c = *buffer++;
195      if ((c & 0xc0) != 0x80) return -(j+1);
196      s -= 6;
197      d |= (c & 0x3f) << s;
198      }
199    
200    /* Check that encoding was the correct unique one */
201    
202    for (j = 0; j < utf8_table1_size; j++)
203      if (d <= utf8_table1[j]) break;
204    if (j != i) return -(i+1);
205    
206    /* Valid value */
207    
208    *vptr = d;
209    return i+1;
210    }
211    
212    #endif
213    
214    
215    
216    /*************************************************
217    *       Convert character value to UTF-8         *
218    *************************************************/
219    
220    /* This function takes an integer value in the range 0 - 0x7fffffff
221    and encodes it as a UTF-8 character in 0 to 6 bytes.
222    
223    Arguments:
224      cvalue     the character value
225      buffer     pointer to buffer for result - at least 6 bytes long
226    
227    Returns:     number of characters placed in the buffer
228    */
229    
230    static int
231    ord2utf8(int cvalue, uschar *buffer)
232    {
233    register int i, j;
234    for (i = 0; i < utf8_table1_size; i++)
235      if (cvalue <= utf8_table1[i]) break;
236    buffer += i;
237    for (j = i; j > 0; j--)
238     {
239     *buffer-- = 0x80 | (cvalue & 0x3f);
240     cvalue >>= 6;
241     }
242    *buffer = utf8_table2[i] | cvalue;
243    return i + 1;
244    }
245    
246          default:  
247          code--;  
248    /*************************************************
249    *             Print character string             *
250    *************************************************/
251    
252    /* Character string printing function. Must handle UTF-8 strings in utf8
253    mode. Yields number of characters printed. If handed a NULL file, just counts
254    chars without printing. */
255    
256    static int pchars(unsigned char *p, int length, FILE *f)
257    {
258    int c = 0;
259    int yield = 0;
260    
261    while (length-- > 0)
262      {
263    #if !defined NOUTF8
264      if (use_utf8)
265        {
266        int rc = utf82ord(p, &c);
267    
268        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
269          {
270          length -= rc - 1;
271          p += rc;
272          if (c < 256 && isprint(c))
273            {
274            if (f != NULL) fprintf(f, "%c", c);
275            yield++;
276            }
277          else
278            {
279            int n;
280            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
281            yield += n;
282          }          }
283          continue;
284        }        }
285      break;      }
286    #endif
287    
288      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
289    
290      default:    if (isprint(c = *(p++)))
291      fprintf(outfile, "    %s", OP_names[*code]);      {
292      break;      if (f != NULL) fprintf(f, "%c", c);
293        yield++;
294        }
295      else
296        {
297        if (f != NULL) fprintf(f, "\\x%02x", c);
298        yield += 4;
299      }      }
   
   code++;  
   fprintf(outfile, "\n");  
300    }    }
301    
302    return yield;
303  }  }
304    
305    
306    
307  /* Character string printing function. */  /*************************************************
308    *              Callout function                  *
309    *************************************************/
310    
311    /* Called from PCRE as a result of the (?C) item. We print out where we are in
312    the match. Yield zero unless more callouts than the fail count, or the callout
313    data is not zero. */
314    
315  static void pchars(unsigned char *p, int length)  static int callout(pcre_callout_block *cb)
316  {  {
317  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
318  while (length-- > 0)  int i, pre_start, post_start, subject_length;
319    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
320      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
321      {
322      fprintf(f, "Callout %d: last capture = %d\n",
323        cb->callout_number, cb->capture_last);
324    
325      for (i = 0; i < cb->capture_top * 2; i += 2)
326        {
327        if (cb->offset_vector[i] < 0)
328          fprintf(f, "%2d: <unset>\n", i/2);
329        else
330          {
331          fprintf(f, "%2d: ", i/2);
332          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
333            cb->offset_vector[i+1] - cb->offset_vector[i], f);
334          fprintf(f, "\n");
335          }
336        }
337      }
338    
339    /* Re-print the subject in canonical form, the first time or if giving full
340    datails. On subsequent calls in the same match, we use pchars just to find the
341    printed lengths of the substrings. */
342    
343    if (f != NULL) fprintf(f, "--->");
344    
345    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
346    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
347      cb->current_position - cb->start_match, f);
348    
349    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
350    
351    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
352      cb->subject_length - cb->current_position, f);
353    
354    if (f != NULL) fprintf(f, "\n");
355    
356    /* Always print appropriate indicators, with callout number if not already
357    shown. For automatic callouts, show the pattern offset. */
358    
359    if (cb->callout_number == 255)
360      {
361      fprintf(outfile, "%+3d ", cb->pattern_position);
362      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
363      }
364    else
365      {
366      if (callout_extra) fprintf(outfile, "    ");
367        else fprintf(outfile, "%3d ", cb->callout_number);
368      }
369    
370    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
371    fprintf(outfile, "^");
372    
373    if (post_start > 0)
374      {
375      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
376      fprintf(outfile, "^");
377      }
378    
379    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
380      fprintf(outfile, " ");
381    
382    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
383      pbuffer + cb->pattern_position);
384    
385    fprintf(outfile, "\n");
386    first_callout = 0;
387    
388    if (cb->callout_data != NULL)
389      {
390      int callout_data = *((int *)(cb->callout_data));
391      if (callout_data != 0)
392        {
393        fprintf(outfile, "Callout data = %d\n", callout_data);
394        return callout_data;
395        }
396      }
397    
398    return (cb->callout_number != callout_fail_id)? 0 :
399           (++callout_count >= callout_fail_count)? 1 : 0;
400  }  }
401    
402    
403    /*************************************************
404    *            Local malloc functions              *
405    *************************************************/
406    
407  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
408  compiled re. */  compiled re. */
409    
410  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
411  {  {
412  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
413  return malloc(size);  gotten_store = size;
414    if (show_malloc)
415      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
416    return block;
417    }
418    
419    static void new_free(void *block)
420    {
421    if (show_malloc)
422      fprintf(outfile, "free             %p\n", block);
423    free(block);
424    }
425    
426    
427    /* For recursion malloc/free, to test stacking calls */
428    
429    static void *stack_malloc(size_t size)
430    {
431    void *block = malloc(size);
432    if (show_malloc)
433      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
434    return block;
435    }
436    
437    static void stack_free(void *block)
438    {
439    if (show_malloc)
440      fprintf(outfile, "stack_free       %p\n", block);
441    free(block);
442  }  }
443    
444    
445    /*************************************************
446    *          Call pcre_fullinfo()                  *
447    *************************************************/
448    
449    /* Get one piece of information from the pcre_fullinfo() function */
450    
451    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
452    {
453    int rc;
454    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
455      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
456    }
457    
458    
459    
460    /*************************************************
461    *         Byte flipping function                 *
462    *************************************************/
463    
464    static long int
465    byteflip(long int value, int n)
466    {
467    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
468    return ((value & 0x000000ff) << 24) |
469           ((value & 0x0000ff00) <<  8) |
470           ((value & 0x00ff0000) >>  8) |
471           ((value & 0xff000000) >> 24);
472    }
473    
474    
475    
476    
477    /*************************************************
478    *        Check match or recursion limit          *
479    *************************************************/
480    
481    static int
482    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
483      int start_offset, int options, int *use_offsets, int use_size_offsets,
484      int flag, unsigned long int *limit, int errnumber, const char *msg)
485    {
486    int count;
487    int min = 0;
488    int mid = 64;
489    int max = -1;
490    
491    extra->flags |= flag;
492    
493    for (;;)
494      {
495      *limit = mid;
496    
497      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
498        use_offsets, use_size_offsets);
499    
500      if (count == errnumber)
501        {
502        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
503        min = mid;
504        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
505        }
506    
507      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
508                             count == PCRE_ERROR_PARTIAL)
509        {
510        if (mid == min + 1)
511          {
512          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
513          break;
514          }
515        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
516        max = mid;
517        mid = (min + mid)/2;
518        }
519      else break;    /* Some other error */
520      }
521    
522    extra->flags &= ~flag;
523    return count;
524    }
525    
526    
527    
528    /*************************************************
529    *                Main Program                    *
530    *************************************************/
531    
532  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
533  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 292  int study_options = 0; Line 541  int study_options = 0;
541  int op = 1;  int op = 1;
542  int timeit = 0;  int timeit = 0;
543  int showinfo = 0;  int showinfo = 0;
544    int showstore = 0;
545    int quiet = 0;
546    int size_offsets = 45;
547    int size_offsets_max;
548    int *offsets = NULL;
549    #if !defined NOPOSIX
550  int posix = 0;  int posix = 0;
551    #endif
552  int debug = 0;  int debug = 0;
553  int done = 0;  int done = 0;
554  unsigned char buffer[30000];  int all_use_dfa = 0;
555  unsigned char dbuffer[1024];  int yield = 0;
556    
557  /* Static so that new_malloc can use it. */  unsigned char *buffer;
558    unsigned char *dbuffer;
559    
560    /* Get buffers from malloc() so that Electric Fence will check their misuse
561    when I am debugging. */
562    
563    buffer = (unsigned char *)malloc(BUFFER_SIZE);
564    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
565    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
566    
567    /* The outfile variable is static so that new_malloc can use it. The _setmode()
568    stuff is some magic that I don't understand, but which apparently does good
569    things in Windows. It's related to line terminations.  */
570    
571    #if defined(_WIN32) || defined(WIN32)
572    _setmode( _fileno( stdout ), 0x8000 );
573    #endif  /* defined(_WIN32) || defined(WIN32) */
574    
575  outfile = stdout;  outfile = stdout;
576    
# Line 306  outfile = stdout; Line 578  outfile = stdout;
578    
579  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
580    {    {
581    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
582    
583      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
584        showstore = 1;
585    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
586      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
587    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
588    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
589    #if !defined NODFA
590      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
591    #endif
592      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
593          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
594            *endptr == 0))
595        {
596        op++;
597        argc--;
598        }
599    #if !defined NOPOSIX
600    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
601    #endif
602      else if (strcmp(argv[op], "-C") == 0)
603        {
604        int rc;
605        printf("PCRE version %s\n", pcre_version());
606        printf("Compiled with\n");
607        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
608        printf("  %sUTF-8 support\n", rc? "" : "No ");
609        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
610        printf("  %sUnicode properties support\n", rc? "" : "No ");
611        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
612        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
613        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
614        printf("  Internal link size = %d\n", rc);
615        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
616        printf("  POSIX malloc threshold = %d\n", rc);
617        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
618        printf("  Default match limit = %d\n", rc);
619        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
620        printf("  Default recursion depth limit = %d\n", rc);
621        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
622        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
623        exit(0);
624        }
625    else    else
626      {      {
627      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
628      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
629      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
630             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n");
631             "  -p   use POSIX interface\n"  #if !defined NODFA
632             "  -s   output store information\n"      printf("  -dfa   force DFA matching for all subjects\n");
633             "  -t   time compilation and execution\n");  #endif
634      return 1;      printf("  -i     show information about compiled pattern\n"
635               "  -m     output memory used information\n"
636               "  -o <n> set size of offsets vector to <n>\n");
637    #if !defined NOPOSIX
638        printf("  -p     use POSIX interface\n");
639    #endif
640        printf("  -s     output store (memory) used information\n"
641               "  -t     time compilation and execution\n");
642        yield = 1;
643        goto EXIT;
644      }      }
645    op++;    op++;
646    argc--;    argc--;
647    }    }
648    
649    /* Get the store for the offsets vector, and remember what it was */
650    
651    size_offsets_max = size_offsets;
652    offsets = (int *)malloc(size_offsets_max * sizeof(int));
653    if (offsets == NULL)
654      {
655      printf("** Failed to get %d bytes of memory for offsets vector\n",
656        size_offsets_max * sizeof(int));
657      yield = 1;
658      goto EXIT;
659      }
660    
661  /* Sort out the input and output files */  /* Sort out the input and output files */
662    
663  if (argc > 1)  if (argc > 1)
664    {    {
665    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
666    if (infile == NULL)    if (infile == NULL)
667      {      {
668      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
669      return 1;      yield = 1;
670        goto EXIT;
671      }      }
672    }    }
673    
674  if (argc > 2)  if (argc > 2)
675    {    {
676    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
677    if (outfile == NULL)    if (outfile == NULL)
678      {      {
679      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
680      return 1;      yield = 1;
681        goto EXIT;
682      }      }
683    }    }
684    
685  /* Set alternative malloc function */  /* Set alternative malloc function */
686    
687  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
688    pcre_free = new_free;
689    pcre_stack_malloc = stack_malloc;
690    pcre_stack_free = stack_free;
691    
692  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
693    
694  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
695    
696  /* Main loop */  /* Main loop */
697    
# Line 362  while (!done) Line 699  while (!done)
699    {    {
700    pcre *re = NULL;    pcre *re = NULL;
701    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
702    
703    #if !defined NOPOSIX  /* There are still compilers that require no indent */
704    regex_t preg;    regex_t preg;
705      int do_posix = 0;
706    #endif
707    
708    const char *error;    const char *error;
709    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
710    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
711      const unsigned char *tables = NULL;
712      unsigned long int true_size, true_study_size = 0;
713      size_t size, regex_gotten_store;
714    int do_study = 0;    int do_study = 0;
715    int do_debug = debug;    int do_debug = debug;
716      int do_G = 0;
717      int do_g = 0;
718    int do_showinfo = showinfo;    int do_showinfo = showinfo;
719    int do_posix = 0;    int do_showrest = 0;
720      int do_flip = 0;
721    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
722    
723      use_utf8 = 0;
724    
725    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
726    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
727    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
728      fflush(outfile);
729    
730    p = buffer;    p = buffer;
731    while (isspace(*p)) p++;    while (isspace(*p)) p++;
732    if (*p == 0) continue;    if (*p == 0) continue;
733    
734    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
735    complete, read more. */  
736      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
737        {
738        unsigned long int magic;
739        uschar sbuf[8];
740        FILE *f;
741    
742        p++;
743        pp = p + (int)strlen((char *)p);
744        while (isspace(pp[-1])) pp--;
745        *pp = 0;
746    
747        f = fopen((char *)p, "rb");
748        if (f == NULL)
749          {
750          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
751          continue;
752          }
753    
754        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
755    
756        true_size =
757          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
758        true_study_size =
759          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
760    
761        re = (real_pcre *)new_malloc(true_size);
762        regex_gotten_store = gotten_store;
763    
764        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
765    
766        magic = ((real_pcre *)re)->magic_number;
767        if (magic != MAGIC_NUMBER)
768          {
769          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
770            {
771            do_flip = 1;
772            }
773          else
774            {
775            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
776            fclose(f);
777            continue;
778            }
779          }
780    
781        fprintf(outfile, "Compiled regex%s loaded from %s\n",
782          do_flip? " (byte-inverted)" : "", p);
783    
784        /* Need to know if UTF-8 for printing data strings */
785    
786        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
787        use_utf8 = (options & PCRE_UTF8) != 0;
788    
789        /* Now see if there is any following study data */
790    
791        if (true_study_size != 0)
792          {
793          pcre_study_data *psd;
794    
795          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
796          extra->flags = PCRE_EXTRA_STUDY_DATA;
797    
798          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
799          extra->study_data = psd;
800    
801          if (fread(psd, 1, true_study_size, f) != true_study_size)
802            {
803            FAIL_READ:
804            fprintf(outfile, "Failed to read data from %s\n", p);
805            if (extra != NULL) new_free(extra);
806            if (re != NULL) new_free(re);
807            fclose(f);
808            continue;
809            }
810          fprintf(outfile, "Study data loaded from %s\n", p);
811          do_study = 1;     /* To get the data output if requested */
812          }
813        else fprintf(outfile, "No study data\n");
814    
815        fclose(f);
816        goto SHOW_INFO;
817        }
818    
819      /* In-line pattern (the usual case). Get the delimiter and seek the end of
820      the pattern; if is isn't complete, read more. */
821    
822    delimiter = *p++;    delimiter = *p++;
823    
# Line 403  while (!done) Line 839  while (!done)
839        }        }
840      if (*pp != 0) break;      if (*pp != 0) break;
841    
842      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
843      if (len < 256)      if (len < 256)
844        {        {
845        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 426  while (!done) Line 862  while (!done)
862    
863    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
864    
865    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
866      for callouts. */
867    
868    *pp++ = 0;    *pp++ = 0;
869      strcpy((char *)pbuffer, (char *)p);
870    
871    /* Look for options after final delimiter */    /* Look for options after final delimiter */
872    
873    options = 0;    options = 0;
874    study_options = 0;    study_options = 0;
875      log_store = showstore;  /* default from command line */
876    
877    while (*pp != 0)    while (*pp != 0)
878      {      {
879      switch (*pp++)      switch (*pp++)
880        {        {
881          case 'f': options |= PCRE_FIRSTLINE; break;
882          case 'g': do_g = 1; break;
883        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
884        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
885        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
886        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
887    
888          case '+': do_showrest = 1; break;
889        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
890          case 'C': options |= PCRE_AUTO_CALLOUT; break;
891        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
892        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
893          case 'F': do_flip = 1; break;
894          case 'G': do_G = 1; break;
895        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
896          case 'M': log_store = 1; break;
897          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
898    
899    #if !defined NOPOSIX
900        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
901    #endif
902    
903        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
904        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
905        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
906          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
907          case '?': options |= PCRE_NO_UTF8_CHECK; break;
908    
909        case 'L':        case 'L':
910        ppp = pp;        ppp = pp;
911        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
912          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
913        *ppp = 0;        *ppp = 0;
914        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
915          {          {
# Line 465  while (!done) Line 920  while (!done)
920        pp = ppp;        pp = ppp;
921        break;        break;
922    
923        case '\n': case ' ': break;        case '>':
924          to_file = pp;
925          while (*pp != 0) pp++;
926          while (isspace(pp[-1])) pp--;
927          *pp = 0;
928          break;
929    
930          case '\r':                      /* So that it works in Windows */
931          case '\n':
932          case ' ':
933          break;
934    
935        default:        default:
936        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
937        goto SKIP_DATA;        goto SKIP_DATA;
# Line 476  while (!done) Line 942  while (!done)
942    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
943    local character tables. */    local character tables. */
944    
945    #if !defined NOPOSIX
946    if (posix || do_posix)    if (posix || do_posix)
947      {      {
948      int rc;      int rc;
949      int cflags = 0;      int cflags = 0;
950    
951      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
952      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
953        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
954        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
955        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
956    
957      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
958    
959      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 489  while (!done) Line 961  while (!done)
961    
962      if (rc != 0)      if (rc != 0)
963        {        {
964        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
965        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
966        goto SKIP_DATA;        goto SKIP_DATA;
967        }        }
# Line 498  while (!done) Line 970  while (!done)
970    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
971    
972    else    else
973    #endif  /* !defined NOPOSIX */
974    
975      {      {
976      if (timeit)      if (timeit)
977        {        {
# Line 511  while (!done) Line 985  while (!done)
985          }          }
986        time_taken = clock() - start_time;        time_taken = clock() - start_time;
987        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
988          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
989          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
990        }        }
991    
992      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 528  while (!done) Line 1002  while (!done)
1002          {          {
1003          for (;;)          for (;;)
1004            {            {
1005            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1006              {              {
1007              done = 1;              done = 1;
1008              goto CONTINUE;              goto CONTINUE;
# Line 542  while (!done) Line 1016  while (!done)
1016        goto CONTINUE;        goto CONTINUE;
1017        }        }
1018    
1019      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
1020        info-returning functions. The old one has a limited interface and
1021        returns only limited data. Check that it agrees with the newer one. */
1022    
1023        if (log_store)
1024          fprintf(outfile, "Memory allocation (code space): %d\n",
1025            (int)(gotten_store -
1026                  sizeof(real_pcre) -
1027                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1028    
1029      if (do_showinfo)      /* Extract the size for possible writing before possibly flipping it,
1030        {      and remember the store that was got. */
       int first_char, count;  
1031    
1032        if (do_debug) print_internals(re, outfile);      true_size = ((real_pcre *)re)->size;
1033        regex_gotten_store = gotten_store;
       count = pcre_info(re, &options, &first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d while reading info\n", count);  
       else  
         {  
         fprintf(outfile, "Identifying subpattern count = %d\n", count);  
         if (options == 0) fprintf(outfile, "No options\n");  
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "",  
             ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
         if (first_char == -1)  
           {  
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
         else  
           {  
           if (isprint(first_char))  
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
         }  
       }  
1034    
1035      /* If /S was present, study the regexp to generate additional info to      /* If /S was present, study the regexp to generate additional info to
1036      help with the matching. */      help with the matching. */
# Line 599  while (!done) Line 1047  while (!done)
1047          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1048          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1049          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
1050            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1051            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1052          }          }
   
1053        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
1054        if (error != NULL)        if (error != NULL)
1055          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
1056        else if (extra == NULL)        else if (extra != NULL)
1057          fprintf(outfile, "Study returned NULL\n");          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1058          }
1059    
1060        /* This looks at internal information. A bit kludgy to do it this      /* If the 'F' option was present, we flip the bytes of all the integer
1061        way, but it is useful for testing. */      fields in the regex data block and the study block. This is to make it
1062        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1063        compiled on a different architecture. */
1064    
1065        if (do_flip)
1066          {
1067          real_pcre *rre = (real_pcre *)re;
1068          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1069          rre->size = byteflip(rre->size, sizeof(rre->size));
1070          rre->options = byteflip(rre->options, sizeof(rre->options));
1071          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1072          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1073          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1074          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1075          rre->name_table_offset = byteflip(rre->name_table_offset,
1076            sizeof(rre->name_table_offset));
1077          rre->name_entry_size = byteflip(rre->name_entry_size,
1078            sizeof(rre->name_entry_size));
1079          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1080    
1081        else if (do_showinfo)        if (extra != NULL)
1082          {          {
1083          real_pcre_extra *xx = (real_pcre_extra *)extra;          pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1084          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1085            fprintf(outfile, "No starting character set\n");          rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1086            }
1087          }
1088    
1089        /* Extract information from the compiled data if required */
1090    
1091        SHOW_INFO:
1092    
1093        if (do_showinfo)
1094          {
1095          unsigned long int get_options, all_options;
1096    #if !defined NOINFOCHECK
1097          int old_first_char, old_options, old_count;
1098    #endif
1099          int count, backrefmax, first_char, need_char;
1100          int nameentrysize, namecount;
1101          const uschar *nametable;
1102    
1103          if (do_debug)
1104            {
1105            fprintf(outfile, "------------------------------------------------------------------\n");
1106            pcre_printint(re, outfile);
1107            }
1108    
1109          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1110          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1111          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1112          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1113          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1114          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1115          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1116          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1117          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1118    
1119    #if !defined NOINFOCHECK
1120          old_count = pcre_info(re, &old_options, &old_first_char);
1121          if (count < 0) fprintf(outfile,
1122            "Error %d from pcre_info()\n", count);
1123          else
1124            {
1125            if (old_count != count) fprintf(outfile,
1126              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1127                old_count);
1128    
1129            if (old_first_char != first_char) fprintf(outfile,
1130              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1131                first_char, old_first_char);
1132    
1133            if (old_options != (int)get_options) fprintf(outfile,
1134              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1135                get_options, old_options);
1136            }
1137    #endif
1138    
1139          if (size != regex_gotten_store) fprintf(outfile,
1140            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1141            (int)size, (int)regex_gotten_store);
1142    
1143          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1144          if (backrefmax > 0)
1145            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1146    
1147          if (namecount > 0)
1148            {
1149            fprintf(outfile, "Named capturing subpatterns:\n");
1150            while (namecount-- > 0)
1151              {
1152              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1153                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1154                GET2(nametable, 0));
1155              nametable += nameentrysize;
1156              }
1157            }
1158    
1159          /* The NOPARTIAL bit is a private bit in the options, so we have
1160          to fish it out via out back door */
1161    
1162          all_options = ((real_pcre *)re)->options;
1163          if (do_flip)
1164            {
1165            all_options = byteflip(all_options, sizeof(all_options));
1166            }
1167    
1168          if ((all_options & PCRE_NOPARTIAL) != 0)
1169            fprintf(outfile, "Partial matching not supported\n");
1170    
1171          if (get_options == 0) fprintf(outfile, "No options\n");
1172            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s\n",
1173              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1174              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1175              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1176              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1177              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1178              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1179              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1180              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1181              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1182              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1183              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1184              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1185    
1186          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1187            fprintf(outfile, "Case state changes\n");
1188    
1189          if (first_char == -1)
1190            {
1191            fprintf(outfile, "First char at start or follows \\n\n");
1192            }
1193          else if (first_char < 0)
1194            {
1195            fprintf(outfile, "No first char\n");
1196            }
1197          else
1198            {
1199            int ch = first_char & 255;
1200            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1201              "" : " (caseless)";
1202            if (isprint(ch))
1203              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1204            else
1205              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1206            }
1207    
1208          if (need_char < 0)
1209            {
1210            fprintf(outfile, "No need char\n");
1211            }
1212          else
1213            {
1214            int ch = need_char & 255;
1215            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1216              "" : " (caseless)";
1217            if (isprint(ch))
1218              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1219            else
1220              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1221            }
1222    
1223          /* Don't output study size; at present it is in any case a fixed
1224          value, but it varies, depending on the computer architecture, and
1225          so messes up the test suite. (And with the /F option, it might be
1226          flipped.) */
1227    
1228          if (do_study)
1229            {
1230            if (extra == NULL)
1231              fprintf(outfile, "Study returned NULL\n");
1232          else          else
1233            {            {
1234            int i;            uschar *start_bits = NULL;
1235            int c = 24;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1236            fprintf(outfile, "Starting character set: ");  
1237            for (i = 0; i < 256; i++)            if (start_bits == NULL)
1238                fprintf(outfile, "No starting byte set\n");
1239              else
1240              {              {
1241              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              int i;
1242                int c = 24;
1243                fprintf(outfile, "Starting byte set: ");
1244                for (i = 0; i < 256; i++)
1245                {                {
1246                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
                 {  
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
1247                  {                  {
1248                  fprintf(outfile, "\\x%02x ", i);                  if (c > 75)
1249                  c += 5;                    {
1250                      fprintf(outfile, "\n  ");
1251                      c = 2;
1252                      }
1253                    if (isprint(i) && i != ' ')
1254                      {
1255                      fprintf(outfile, "%c ", i);
1256                      c += 2;
1257                      }
1258                    else
1259                      {
1260                      fprintf(outfile, "\\x%02x ", i);
1261                      c += 5;
1262                      }
1263                  }                  }
1264                }                }
1265                fprintf(outfile, "\n");
1266              }              }
           fprintf(outfile, "\n");  
1267            }            }
1268          }          }
1269        }        }
1270      }  
1271        /* If the '>' option was present, we write out the regex to a file, and
1272        that is all. The first 8 bytes of the file are the regex length and then
1273        the study length, in big-endian order. */
1274    
1275        if (to_file != NULL)
1276          {
1277          FILE *f = fopen((char *)to_file, "wb");
1278          if (f == NULL)
1279            {
1280            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1281            }
1282          else
1283            {
1284            uschar sbuf[8];
1285            sbuf[0] = (true_size >> 24)  & 255;
1286            sbuf[1] = (true_size >> 16)  & 255;
1287            sbuf[2] = (true_size >>  8)  & 255;
1288            sbuf[3] = (true_size)  & 255;
1289    
1290            sbuf[4] = (true_study_size >> 24)  & 255;
1291            sbuf[5] = (true_study_size >> 16)  & 255;
1292            sbuf[6] = (true_study_size >>  8)  & 255;
1293            sbuf[7] = (true_study_size)  & 255;
1294    
1295            if (fwrite(sbuf, 1, 8, f) < 8 ||
1296                fwrite(re, 1, true_size, f) < true_size)
1297              {
1298              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1299              }
1300            else
1301              {
1302              fprintf(outfile, "Compiled regex written to %s\n", to_file);
1303              if (extra != NULL)
1304                {
1305                if (fwrite(extra->study_data, 1, true_study_size, f) <
1306                    true_study_size)
1307                  {
1308                  fprintf(outfile, "Write error on %s: %s\n", to_file,
1309                    strerror(errno));
1310                  }
1311                else fprintf(outfile, "Study data written to %s\n", to_file);
1312                }
1313              }
1314            fclose(f);
1315            }
1316    
1317          new_free(re);
1318          if (extra != NULL) new_free(extra);
1319          if (tables != NULL) new_free((void *)tables);
1320          continue;  /* With next regex */
1321          }
1322        }        /* End of non-POSIX compile */
1323    
1324    /* Read data lines and test them */    /* Read data lines and test them */
1325    
1326    for (;;)    for (;;)
1327      {      {
1328      unsigned char *q;      uschar *q;
1329        uschar *bptr = dbuffer;
1330        int *use_offsets = offsets;
1331        int use_size_offsets = size_offsets;
1332        int callout_data = 0;
1333        int callout_data_set = 0;
1334      int count, c;      int count, c;
1335      int copystrings = 0;      int copystrings = 0;
1336        int find_match_limit = 0;
1337      int getstrings = 0;      int getstrings = 0;
1338      int getlist = 0;      int getlist = 0;
1339      int offsets[45];      int gmatched = 0;
1340      int size_offsets = sizeof(offsets)/sizeof(int);      int start_offset = 0;
1341        int g_notempty = 0;
1342        int use_dfa = 0;
1343    
1344      options = 0;      options = 0;
1345    
1346      if (infile == stdin) printf("  data> ");      pcre_callout = callout;
1347      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      first_callout = 1;
1348        callout_extra = 0;
1349        callout_count = 0;
1350        callout_fail_count = 999999;
1351        callout_fail_id = -1;
1352        show_malloc = 0;
1353    
1354        if (infile == stdin) printf("data> ");
1355        if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1356        {        {
1357        done = 1;        done = 1;
1358        goto CONTINUE;        goto CONTINUE;
# Line 684  while (!done) Line 1372  while (!done)
1372        {        {
1373        int i = 0;        int i = 0;
1374        int n = 0;        int n = 0;
1375    
1376        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1377          {          {
1378          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 703  while (!done) Line 1392  while (!done)
1392          break;          break;
1393    
1394          case 'x':          case 'x':
1395    
1396            /* Handle \x{..} specially - new Perl thing for utf8 */
1397    
1398    #if !defined NOUTF8
1399            if (*p == '{')
1400              {
1401              unsigned char *pt = p;
1402              c = 0;
1403              while (isxdigit(*(++pt)))
1404                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1405              if (*pt == '}')
1406                {
1407                unsigned char buff8[8];
1408                int ii, utn;
1409                utn = ord2utf8(c, buff8);
1410                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1411                c = buff8[ii];   /* Last byte */
1412                p = pt + 1;
1413                break;
1414                }
1415              /* Not correct form; fall through */
1416              }
1417    #endif
1418    
1419            /* Ordinary \x */
1420    
1421          c = 0;          c = 0;
1422          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1423            {            {
# Line 711  while (!done) Line 1426  while (!done)
1426            }            }
1427          break;          break;
1428    
1429          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1430          p--;          p--;
1431          continue;          continue;
1432    
1433            case '>':
1434            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1435            continue;
1436    
1437          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1438          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1439          continue;          continue;
# Line 724  while (!done) Line 1443  while (!done)
1443          continue;          continue;
1444    
1445          case 'C':          case 'C':
1446          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1447          copystrings |= 1 << n;            {
1448              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1449              copystrings |= 1 << n;
1450              }
1451            else if (isalnum(*p))
1452              {
1453              uschar name[256];
1454              uschar *npp = name;
1455              while (isalnum(*p)) *npp++ = *p++;
1456              *npp = 0;
1457              n = pcre_get_stringnumber(re, (char *)name);
1458              if (n < 0)
1459                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1460              else copystrings |= 1 << n;
1461              }
1462            else if (*p == '+')
1463              {
1464              callout_extra = 1;
1465              p++;
1466              }
1467            else if (*p == '-')
1468              {
1469              pcre_callout = NULL;
1470              p++;
1471              }
1472            else if (*p == '!')
1473              {
1474              callout_fail_id = 0;
1475              p++;
1476              while(isdigit(*p))
1477                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1478              callout_fail_count = 0;
1479              if (*p == '!')
1480                {
1481                p++;
1482                while(isdigit(*p))
1483                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1484                }
1485              }
1486            else if (*p == '*')
1487              {
1488              int sign = 1;
1489              callout_data = 0;
1490              if (*(++p) == '-') { sign = -1; p++; }
1491              while(isdigit(*p))
1492                callout_data = callout_data * 10 + *p++ - '0';
1493              callout_data *= sign;
1494              callout_data_set = 1;
1495              }
1496          continue;          continue;
1497    
1498    #if !defined NODFA
1499            case 'D':
1500    #if !defined NOPOSIX
1501            if (posix || do_posix)
1502              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1503            else
1504    #endif
1505              use_dfa = 1;
1506            continue;
1507    
1508            case 'F':
1509            options |= PCRE_DFA_SHORTEST;
1510            continue;
1511    #endif
1512    
1513          case 'G':          case 'G':
1514          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1515          getstrings |= 1 << n;            {
1516              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1517              getstrings |= 1 << n;
1518              }
1519            else if (isalnum(*p))
1520              {
1521              uschar name[256];
1522              uschar *npp = name;
1523              while (isalnum(*p)) *npp++ = *p++;
1524              *npp = 0;
1525              n = pcre_get_stringnumber(re, (char *)name);
1526              if (n < 0)
1527                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1528              else getstrings |= 1 << n;
1529              }
1530          continue;          continue;
1531    
1532          case 'L':          case 'L':
1533          getlist = 1;          getlist = 1;
1534          continue;          continue;
1535    
1536            case 'M':
1537            find_match_limit = 1;
1538            continue;
1539    
1540            case 'N':
1541            options |= PCRE_NOTEMPTY;
1542            continue;
1543    
1544          case 'O':          case 'O':
1545          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1546          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1547              {
1548              size_offsets_max = n;
1549              free(offsets);
1550              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1551              if (offsets == NULL)
1552                {
1553                printf("** Failed to get %d bytes of memory for offsets vector\n",
1554                  size_offsets_max * sizeof(int));
1555                yield = 1;
1556                goto EXIT;
1557                }
1558              }
1559            use_size_offsets = n;
1560            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1561            continue;
1562    
1563            case 'P':
1564            options |= PCRE_PARTIAL;
1565            continue;
1566    
1567    #if !defined NODFA
1568            case 'R':
1569            options |= PCRE_DFA_RESTART;
1570            continue;
1571    #endif
1572    
1573            case 'S':
1574            show_malloc = 1;
1575          continue;          continue;
1576    
1577          case 'Z':          case 'Z':
1578          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1579          continue;          continue;
1580    
1581            case '?':
1582            options |= PCRE_NO_UTF8_CHECK;
1583            continue;
1584          }          }
1585        *q++ = c;        *q++ = c;
1586        }        }
1587      *q = 0;      *q = 0;
1588      len = q - dbuffer;      len = q - dbuffer;
1589    
1590        if ((all_use_dfa || use_dfa) && find_match_limit)
1591          {
1592          printf("**Match limit not relevant for DFA matching: ignored\n");
1593          find_match_limit = 0;
1594          }
1595    
1596      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1597      support timing. */      support timing or playing with the match limit or callout data. */
1598    
1599    #if !defined NOPOSIX
1600      if (posix || do_posix)      if (posix || do_posix)
1601        {        {
1602        int rc;        int rc;
1603        int eflags = 0;        int eflags = 0;
1604        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1605          if (use_size_offsets > 0)
1606            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1607        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1608        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1609    
1610        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1611    
1612        if (rc != 0)        if (rc != 0)
1613          {          {
1614          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1615          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1616          }          }
1617          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1618                  != 0)
1619            {
1620            fprintf(outfile, "Matched with REG_NOSUB\n");
1621            }
1622        else        else
1623          {          {
1624          size_t i;          size_t i;
1625          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1626            {            {
1627            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1628              {              {
1629              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1630              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1631                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1632              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1633                if (i == 0 && do_showrest)
1634                  {
1635                  fprintf(outfile, " 0+ ");
1636                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1637                    outfile);
1638                  fprintf(outfile, "\n");
1639                  }
1640              }              }
1641            }            }
1642          }          }
1643          free(pmatch);
1644        }        }
1645    
1646      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1647    
1648      else      else
1649    #endif  /* !defined NOPOSIX */
1650    
1651        for (;; gmatched++)    /* Loop for /g or /G */
1652        {        {
1653        if (timeit)        if (timeit)
1654          {          {
1655          register int i;          register int i;
1656          clock_t time_taken;          clock_t time_taken;
1657          clock_t start_time = clock();          clock_t start_time = clock();
1658    
1659    #if !defined NODFA
1660            if (all_use_dfa || use_dfa)
1661              {
1662              int workspace[1000];
1663              for (i = 0; i < LOOPREPEAT; i++)
1664                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1665                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1666                  sizeof(workspace)/sizeof(int));
1667              }
1668            else
1669    #endif
1670    
1671          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1672            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1673              size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1674    
1675          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1676          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1677            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1678            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1679            }
1680    
1681          /* If find_match_limit is set, we want to do repeated matches with
1682          varying limits in order to find the minimum value for the match limit and
1683          for the recursion limit. */
1684    
1685          if (find_match_limit)
1686            {
1687            if (extra == NULL)
1688              {
1689              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1690              extra->flags = 0;
1691              }
1692    
1693            count = check_match_limit(re, extra, bptr, len, start_offset,
1694              options|g_notempty, use_offsets, use_size_offsets,
1695              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1696              PCRE_ERROR_MATCHLIMIT, "match()");
1697    
1698            count = check_match_limit(re, extra, bptr, len, start_offset,
1699              options|g_notempty, use_offsets, use_size_offsets,
1700              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1701              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
1702            }
1703    
1704          /* If callout_data is set, use the interface with additional data */
1705    
1706          else if (callout_data_set)
1707            {
1708            if (extra == NULL)
1709              {
1710              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1711              extra->flags = 0;
1712              }
1713            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1714            extra->callout_data = &callout_data;
1715            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1716              options | g_notempty, use_offsets, use_size_offsets);
1717            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1718          }          }
1719    
1720        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
1721          size_offsets);        value of match_limit. */
1722    
1723        if (count == 0)  #if !defined NODFA
1724          else if (all_use_dfa || use_dfa)
1725          {          {
1726          fprintf(outfile, "Matched, but too many substrings\n");          int workspace[1000];
1727          count = size_offsets/3;          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1728              options | g_notempty, use_offsets, use_size_offsets, workspace,
1729              sizeof(workspace)/sizeof(int));
1730            if (count == 0)
1731              {
1732              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1733              count = use_size_offsets/2;
1734              }
1735          }          }
1736    #endif
1737    
1738          else
1739            {
1740            count = pcre_exec(re, extra, (char *)bptr, len,
1741              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1742            if (count == 0)
1743              {
1744              fprintf(outfile, "Matched, but too many substrings\n");
1745              count = use_size_offsets/3;
1746              }
1747            }
1748    
1749          /* Matched */
1750    
1751        if (count >= 0)        if (count >= 0)
1752          {          {
1753          int i;          int i;
1754          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1755            {            {
1756            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1757              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1758            else            else
1759              {              {
1760              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1761              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1762                  use_offsets[i+1] - use_offsets[i], outfile);
1763              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1764                if (i == 0)
1765                  {
1766                  if (do_showrest)
1767                    {
1768                    fprintf(outfile, " 0+ ");
1769                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1770                      outfile);
1771                    fprintf(outfile, "\n");
1772                    }
1773                  }
1774              }              }
1775            }            }
1776    
# Line 832  while (!done) Line 1778  while (!done)
1778            {            {
1779            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1780              {              {
1781              char buffer[16];              char copybuffer[16];
1782              int rc = pcre_copy_substring((char *)dbuffer, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1783                i, buffer, sizeof(buffer));                i, copybuffer, sizeof(copybuffer));
1784              if (rc < 0)              if (rc < 0)
1785                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1786              else              else
1787                fprintf(outfile, "%2dC %s (%d)\n", i, buffer, rc);                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1788              }              }
1789            }            }
1790    
# Line 847  while (!done) Line 1793  while (!done)
1793            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1794              {              {
1795              const char *substring;              const char *substring;
1796              int rc = pcre_get_substring((char *)dbuffer, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1797                i, &substring);                i, &substring);
1798              if (rc < 0)              if (rc < 0)
1799                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
1800              else              else
1801                {                {
1802                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1803                free((void *)substring);                /* free((void *)substring); */
1804                  pcre_free_substring(substring);
1805                }                }
1806              }              }
1807            }            }
# Line 862  while (!done) Line 1809  while (!done)
1809          if (getlist)          if (getlist)
1810            {            {
1811            const char **stringlist;            const char **stringlist;
1812            int rc = pcre_get_substring_list((char *)dbuffer, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1813              &stringlist);              &stringlist);
1814            if (rc < 0)            if (rc < 0)
1815              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 872  while (!done) Line 1819  while (!done)
1819                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1820              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
1821                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
1822              free((void *)stringlist);              /* free((void *)stringlist); */
1823                pcre_free_substring_list(stringlist);
1824              }              }
1825            }            }
1826            }
1827    
1828          /* There was a partial match */
1829    
1830          else if (count == PCRE_ERROR_PARTIAL)
1831            {
1832            fprintf(outfile, "Partial match");
1833    #if !defined NODFA
1834            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1835              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1836                bptr + use_offsets[0]);
1837    #endif
1838            fprintf(outfile, "\n");
1839            break;  /* Out of the /g loop */
1840          }          }
1841    
1842          /* Failed to match. If this is a /g or /G loop and we previously set
1843          g_notempty after a null match, this is not necessarily the end.
1844          We want to advance the start offset, and continue. In the case of UTF-8
1845          matching, the advance must be one character, not one byte. Fudge the
1846          offset values to achieve this. We won't be at the end of the string -
1847          that was checked before setting g_notempty. */
1848    
1849        else        else
1850          {          {
1851          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1852              {
1853              int onechar = 1;
1854              use_offsets[0] = start_offset;
1855              if (use_utf8)
1856                {
1857                while (start_offset + onechar < len)
1858                  {
1859                  int tb = bptr[start_offset+onechar];
1860                  if (tb <= 127) break;
1861                  tb &= 0xc0;
1862                  if (tb != 0 && tb != 0xc0) onechar++;
1863                  }
1864                }
1865              use_offsets[1] = start_offset + onechar;
1866              }
1867            else
1868              {
1869              if (count == PCRE_ERROR_NOMATCH)
1870                {
1871                if (gmatched == 0) fprintf(outfile, "No match\n");
1872                }
1873            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
1874              break;  /* Out of the /g loop */
1875              }
1876            }
1877    
1878          /* If not /g or /G we are done */
1879    
1880          if (!do_g && !do_G) break;
1881    
1882          /* If we have matched an empty string, first check to see if we are at
1883          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1884          what Perl's /g options does. This turns out to be rather cunning. First
1885          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1886          same point. If this fails (picked up above) we advance to the next
1887          character. */
1888    
1889          g_notempty = 0;
1890          if (use_offsets[0] == use_offsets[1])
1891            {
1892            if (use_offsets[0] == len) break;
1893            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1894          }          }
1895        }  
1896      }        /* For /g, update the start offset, leaving the rest alone */
1897    
1898          if (do_g) start_offset = use_offsets[1];
1899    
1900          /* For /G, update the pointer and length */
1901    
1902          else
1903            {
1904            bptr += use_offsets[1];
1905            len -= use_offsets[1];
1906            }
1907          }  /* End of loop for /g and /G */
1908        }    /* End of loop for data lines */
1909    
1910    CONTINUE:    CONTINUE:
1911    
1912    #if !defined NOPOSIX
1913    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1914    if (re != NULL) free(re);  #endif
1915    if (extra != NULL) free(extra);  
1916      if (re != NULL) new_free(re);
1917      if (extra != NULL) new_free(extra);
1918    if (tables != NULL)    if (tables != NULL)
1919      {      {
1920      free((void *)tables);      new_free((void *)tables);
1921      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1922      }      }
1923    }    }
1924    
1925  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1926  return 0;  
1927    EXIT:
1928    
1929    if (infile != NULL && infile != stdin) fclose(infile);
1930    if (outfile != NULL && outfile != stdout) fclose(outfile);
1931    
1932    free(buffer);
1933    free(dbuffer);
1934    free(pbuffer);
1935    free(offsets);
1936    
1937    return yield;
1938  }  }
1939    
1940  /* End */  /* End of pcretest.c */

Legend:
Removed from v.29  
changed lines
  Added in v.87

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12