/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 45 by nigel, Sat Feb 24 21:39:25 2007 UTC revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47    
48    /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
69    
70    #define PCRE_SPY        /* For Win32 build, import data, not export */
71    
72    /* We include pcre_internal.h because we need the internal info for displaying
73    the results of pcre_study() and we also need to know about the internal
74    macros, structures, and other internal data values; pcretest has "inside
75    information" compared to a program that strictly follows the PCRE API. */
76    
77    #include "pcre_internal.h"
78    
79    /* We need access to the data tables that PCRE uses. So as not to have to keep
80    two copies, we include the source file here, changing the names of the external
81    symbols to prevent clashes. */
82    
83    #define _pcre_utf8_table1      utf8_table1
84    #define _pcre_utf8_table1_size utf8_table1_size
85    #define _pcre_utf8_table2      utf8_table2
86    #define _pcre_utf8_table3      utf8_table3
87    #define _pcre_utf8_table4      utf8_table4
88    #define _pcre_utt              utt
89    #define _pcre_utt_size         utt_size
90    #define _pcre_OP_lengths       OP_lengths
91    
92    #include "pcre_tables.c"
93    
94    /* We also need the pcre_printint() function for printing out compiled
95    patterns. This function is in a separate file so that it can be included in
96    pcre_compile.c when that module is compiled with debugging enabled.
97    
98    The definition of the macro PRINTABLE, which determines whether to print an
99    output character as-is or as a hex value when showing compiled patterns, is
100    contained in this file. We uses it here also, in cases when the locale has not
101    been explicitly changed, so as to get consistent output from systems that
102    differ in their output from isprint() even in the "C" locale. */
103    
104  /* Use the internal info for displaying the results of pcre_study(). */  #include "pcre_printint.src"
105    
106    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107    
 #include "internal.h"  
108    
109  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
110  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 114  Makefile. */
114  #include "pcreposix.h"  #include "pcreposix.h"
115  #endif  #endif
116    
117    /* It is also possible, for the benefit of the version imported into Exim, to
118    build pcretest without support for UTF8 (define NOUTF8), without the interface
119    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
120    function (define NOINFOCHECK). */
121    
122    
123    /* Other parameters */
124    
125  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
126  #ifdef CLK_TCK  #ifdef CLK_TCK
127  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 130  Makefile. */
130  #endif  #endif
131  #endif  #endif
132    
133  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
134    
135    #define LOOPREPEAT 500000
136    
137    /* Static variables */
138    
139  static FILE *outfile;  static FILE *outfile;
140  static int log_store = 0;  static int log_store = 0;
141    static int callout_count;
142    static int callout_extra;
143    static int callout_fail_count;
144    static int callout_fail_id;
145    static int first_callout;
146    static int locale_set = 0;
147    static int show_malloc;
148    static int use_utf8;
149  static size_t gotten_store;  static size_t gotten_store;
150    
151    /* The buffers grow automatically if very long input lines are encountered. */
152    
153    static int buffer_size = 50000;
154    static uschar *buffer = NULL;
155    static uschar *dbuffer = NULL;
156    static uschar *pbuffer = NULL;
157    
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
158    
 static const char *OP_names[] = {  
   "End", "\\A", "\\B", "\\b", "\\D", "\\d",  
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
159    
160    /*************************************************
161    *        Read or extend an input line            *
162    *************************************************/
163    
164  static void print_internals(pcre *re)  /* Input lines are read into buffer, but both patterns and data lines can be
165  {  continued over multiple input lines. In addition, if the buffer fills up, we
166  unsigned char *code = ((real_pcre *)re)->code;  want to automatically expand it so as to be able to handle extremely large
167    lines that are needed for certain stress tests. When the input buffer is
168    expanded, the other two buffers must also be expanded likewise, and the
169    contents of pbuffer, which are a copy of the input for callouts, must be
170    preserved (for when expansion happens for a data line). This is not the most
171    optimal way of handling this, but hey, this is just a test program!
172    
173    Arguments:
174      f            the file to read
175      start        where in buffer to start (this *must* be within buffer)
176    
177    Returns:       pointer to the start of new data
178                   could be a copy of start, or could be moved
179                   NULL if no data read and EOF reached
180    */
181    
182  fprintf(outfile, "------------------------------------------------------------------\n");  static uschar *
183    extend_inputline(FILE *f, uschar *start)
184    {
185    uschar *here = start;
186    
187  for(;;)  for (;;)
188    {    {
189    int c;    int rlen = buffer_size - (here - buffer);
   int charlength;  
190    
191    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));    if (rlen > 1000)
192        {
193        int dlen;
194        if (fgets((char *)here, rlen,  f) == NULL)
195          return (here == start)? NULL : start;
196        dlen = (int)strlen((char *)here);
197        if (dlen > 0 && here[dlen - 1] == '\n') return start;
198        here += dlen;
199        }
200    
201    if (*code >= OP_BRA)    else
202      {      {
203      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);      int new_buffer_size = 2*buffer_size;
204      code += 2;      uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
205      }      uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
206        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
207    
208        CLASS_REF_REPEAT:      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
209          {
210          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
211          exit(1);
212          }
213    
214        switch(*code)      memcpy(new_buffer, buffer, buffer_size);
215          {      memcpy(new_pbuffer, pbuffer, buffer_size);
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
216    
217          case OP_CRRANGE:      buffer_size = new_buffer_size;
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
218    
219          default:      start = new_buffer + (start - buffer);
220          code--;      here = new_buffer + (here - buffer);
         }  
       }  
     break;  
221    
222      /* Anything else is just a one-node item */      free(buffer);
223        free(dbuffer);
224        free(pbuffer);
225    
226      default:      buffer = new_buffer;
227      fprintf(outfile, "    %s", OP_names[*code]);      dbuffer = new_dbuffer;
228      break;      pbuffer = new_pbuffer;
229      }      }
230      }
231    
232    return NULL;  /* Control never gets here */
233    }
234    
235    
236    
237    code++;  
238    fprintf(outfile, "\n");  
239    
240    
241    /*************************************************
242    *          Read number from string               *
243    *************************************************/
244    
245    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
246    around with conditional compilation, just do the job by hand. It is only used
247    for unpicking arguments, so just keep it simple.
248    
249    Arguments:
250      str           string to be converted
251      endptr        where to put the end pointer
252    
253    Returns:        the unsigned long
254    */
255    
256    static int
257    get_value(unsigned char *str, unsigned char **endptr)
258    {
259    int result = 0;
260    while(*str != 0 && isspace(*str)) str++;
261    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
262    *endptr = str;
263    return(result);
264    }
265    
266    
267    
268    
269    /*************************************************
270    *            Convert UTF-8 string to value       *
271    *************************************************/
272    
273    /* This function takes one or more bytes that represents a UTF-8 character,
274    and returns the value of the character.
275    
276    Argument:
277      utf8bytes   a pointer to the byte vector
278      vptr        a pointer to an int to receive the value
279    
280    Returns:      >  0 => the number of bytes consumed
281                  -6 to 0 => malformed UTF-8 character at offset = (-return)
282    */
283    
284    #if !defined NOUTF8
285    
286    static int
287    utf82ord(unsigned char *utf8bytes, int *vptr)
288    {
289    int c = *utf8bytes++;
290    int d = c;
291    int i, j, s;
292    
293    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
294      {
295      if ((d & 0x80) == 0) break;
296      d <<= 1;
297      }
298    
299    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
300    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
301    
302    /* i now has a value in the range 1-5 */
303    
304    s = 6*i;
305    d = (c & utf8_table3[i]) << s;
306    
307    for (j = 0; j < i; j++)
308      {
309      c = *utf8bytes++;
310      if ((c & 0xc0) != 0x80) return -(j+1);
311      s -= 6;
312      d |= (c & 0x3f) << s;
313    }    }
314    
315    /* Check that encoding was the correct unique one */
316    
317    for (j = 0; j < utf8_table1_size; j++)
318      if (d <= utf8_table1[j]) break;
319    if (j != i) return -(i+1);
320    
321    /* Valid value */
322    
323    *vptr = d;
324    return i+1;
325    }
326    
327    #endif
328    
329    
330    
331    /*************************************************
332    *       Convert character value to UTF-8         *
333    *************************************************/
334    
335    /* This function takes an integer value in the range 0 - 0x7fffffff
336    and encodes it as a UTF-8 character in 0 to 6 bytes.
337    
338    Arguments:
339      cvalue     the character value
340      utf8bytes  pointer to buffer for result - at least 6 bytes long
341    
342    Returns:     number of characters placed in the buffer
343    */
344    
345    #if !defined NOUTF8
346    
347    static int
348    ord2utf8(int cvalue, uschar *utf8bytes)
349    {
350    register int i, j;
351    for (i = 0; i < utf8_table1_size; i++)
352      if (cvalue <= utf8_table1[i]) break;
353    utf8bytes += i;
354    for (j = i; j > 0; j--)
355     {
356     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
357     cvalue >>= 6;
358     }
359    *utf8bytes = utf8_table2[i] | cvalue;
360    return i + 1;
361  }  }
362    
363    #endif
364    
365    
 /* Character string printing function. */  
366    
367  static void pchars(unsigned char *p, int length)  /*************************************************
368    *             Print character string             *
369    *************************************************/
370    
371    /* Character string printing function. Must handle UTF-8 strings in utf8
372    mode. Yields number of characters printed. If handed a NULL file, just counts
373    chars without printing. */
374    
375    static int pchars(unsigned char *p, int length, FILE *f)
376  {  {
377  int c;  int c = 0;
378    int yield = 0;
379    
380  while (length-- > 0)  while (length-- > 0)
381    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
382      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
383      if (use_utf8)
384        {
385        int rc = utf82ord(p, &c);
386    
387        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
388          {
389          length -= rc - 1;
390          p += rc;
391          if (PRINTHEX(c))
392            {
393            if (f != NULL) fprintf(f, "%c", c);
394            yield++;
395            }
396          else
397            {
398            int n = 4;
399            if (f != NULL) fprintf(f, "\\x{%02x}", c);
400            yield += (n <= 0x000000ff)? 2 :
401                     (n <= 0x00000fff)? 3 :
402                     (n <= 0x0000ffff)? 4 :
403                     (n <= 0x000fffff)? 5 : 6;
404            }
405          continue;
406          }
407        }
408    #endif
409    
410       /* Not UTF-8, or malformed UTF-8  */
411    
412      c = *p++;
413      if (PRINTHEX(c))
414        {
415        if (f != NULL) fprintf(f, "%c", c);
416        yield++;
417        }
418      else
419        {
420        if (f != NULL) fprintf(f, "\\x%02x", c);
421        yield += 4;
422        }
423      }
424    
425    return yield;
426    }
427    
428    
429    
430    /*************************************************
431    *              Callout function                  *
432    *************************************************/
433    
434    /* Called from PCRE as a result of the (?C) item. We print out where we are in
435    the match. Yield zero unless more callouts than the fail count, or the callout
436    data is not zero. */
437    
438    static int callout(pcre_callout_block *cb)
439    {
440    FILE *f = (first_callout | callout_extra)? outfile : NULL;
441    int i, pre_start, post_start, subject_length;
442    
443    if (callout_extra)
444      {
445      fprintf(f, "Callout %d: last capture = %d\n",
446        cb->callout_number, cb->capture_last);
447    
448      for (i = 0; i < cb->capture_top * 2; i += 2)
449        {
450        if (cb->offset_vector[i] < 0)
451          fprintf(f, "%2d: <unset>\n", i/2);
452        else
453          {
454          fprintf(f, "%2d: ", i/2);
455          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
456            cb->offset_vector[i+1] - cb->offset_vector[i], f);
457          fprintf(f, "\n");
458          }
459        }
460      }
461    
462    /* Re-print the subject in canonical form, the first time or if giving full
463    datails. On subsequent calls in the same match, we use pchars just to find the
464    printed lengths of the substrings. */
465    
466    if (f != NULL) fprintf(f, "--->");
467    
468    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
469    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
470      cb->current_position - cb->start_match, f);
471    
472    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
473    
474    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
475      cb->subject_length - cb->current_position, f);
476    
477    if (f != NULL) fprintf(f, "\n");
478    
479    /* Always print appropriate indicators, with callout number if not already
480    shown. For automatic callouts, show the pattern offset. */
481    
482    if (cb->callout_number == 255)
483      {
484      fprintf(outfile, "%+3d ", cb->pattern_position);
485      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
486      }
487    else
488      {
489      if (callout_extra) fprintf(outfile, "    ");
490        else fprintf(outfile, "%3d ", cb->callout_number);
491      }
492    
493    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
494    fprintf(outfile, "^");
495    
496    if (post_start > 0)
497      {
498      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
499      fprintf(outfile, "^");
500      }
501    
502    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
503      fprintf(outfile, " ");
504    
505    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
506      pbuffer + cb->pattern_position);
507    
508    fprintf(outfile, "\n");
509    first_callout = 0;
510    
511    if (cb->callout_data != NULL)
512      {
513      int callout_data = *((int *)(cb->callout_data));
514      if (callout_data != 0)
515        {
516        fprintf(outfile, "Callout data = %d\n", callout_data);
517        return callout_data;
518        }
519      }
520    
521    return (cb->callout_number != callout_fail_id)? 0 :
522           (++callout_count >= callout_fail_count)? 1 : 0;
523  }  }
524    
525    
526    /*************************************************
527    *            Local malloc functions              *
528    *************************************************/
529    
530  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
531  compiled re. */  compiled re. */
532    
533  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
534  {  {
535    void *block = malloc(size);
536  gotten_store = size;  gotten_store = size;
537  if (log_store)  if (show_malloc)
538    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
539      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
540  return malloc(size);  }
541    
542    static void new_free(void *block)
543    {
544    if (show_malloc)
545      fprintf(outfile, "free             %p\n", block);
546    free(block);
547  }  }
548    
549    
550    /* For recursion malloc/free, to test stacking calls */
551    
552    static void *stack_malloc(size_t size)
553    {
554    void *block = malloc(size);
555    if (show_malloc)
556      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
557    return block;
558    }
559    
560    static void stack_free(void *block)
561    {
562    if (show_malloc)
563      fprintf(outfile, "stack_free       %p\n", block);
564    free(block);
565    }
566    
567    
568    /*************************************************
569    *          Call pcre_fullinfo()                  *
570    *************************************************/
571    
572  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
573    
# Line 303  if ((rc = pcre_fullinfo(re, study, optio Line 580  if ((rc = pcre_fullinfo(re, study, optio
580    
581    
582    
583    /*************************************************
584    *         Byte flipping function                 *
585    *************************************************/
586    
587    static unsigned long int
588    byteflip(unsigned long int value, int n)
589    {
590    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
591    return ((value & 0x000000ff) << 24) |
592           ((value & 0x0000ff00) <<  8) |
593           ((value & 0x00ff0000) >>  8) |
594           ((value & 0xff000000) >> 24);
595    }
596    
597    
598    
599    
600    /*************************************************
601    *        Check match or recursion limit          *
602    *************************************************/
603    
604    static int
605    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
606      int start_offset, int options, int *use_offsets, int use_size_offsets,
607      int flag, unsigned long int *limit, int errnumber, const char *msg)
608    {
609    int count;
610    int min = 0;
611    int mid = 64;
612    int max = -1;
613    
614    extra->flags |= flag;
615    
616    for (;;)
617      {
618      *limit = mid;
619    
620      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
621        use_offsets, use_size_offsets);
622    
623      if (count == errnumber)
624        {
625        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
626        min = mid;
627        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
628        }
629    
630      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
631                             count == PCRE_ERROR_PARTIAL)
632        {
633        if (mid == min + 1)
634          {
635          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
636          break;
637          }
638        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
639        max = mid;
640        mid = (min + mid)/2;
641        }
642      else break;    /* Some other error */
643      }
644    
645    extra->flags &= ~flag;
646    return count;
647    }
648    
649    
650    
651    /*************************************************
652    *         Check newline indicator                *
653    *************************************************/
654    
655    /* This is used both at compile and run-time to check for <xxx> escapes, where
656    xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
657    
658    Arguments:
659      p           points after the leading '<'
660      f           file for error message
661    
662    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
663    */
664    
665    static int
666    check_newline(uschar *p, FILE *f)
667    {
668    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
669    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
670    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
671    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
672    fprintf(f, "Unknown newline type at: <%s\n", p);
673    return 0;
674    }
675    
676    
677    
678    /*************************************************
679    *             Usage function                     *
680    *************************************************/
681    
682    static void
683    usage(void)
684    {
685    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
686    printf("  -b       show compiled code (bytecode)\n");
687    printf("  -C       show PCRE compile-time options and exit\n");
688    printf("  -d       debug: show compiled code and information (-b and -i)\n");
689    #if !defined NODFA
690    printf("  -dfa     force DFA matching for all subjects\n");
691    #endif
692    printf("  -help    show usage information\n");
693    printf("  -i       show information about compiled patterns\n"
694           "  -m       output memory used information\n"
695           "  -o <n>   set size of offsets vector to <n>\n");
696    #if !defined NOPOSIX
697    printf("  -p       use POSIX interface\n");
698    #endif
699    printf("  -q       quiet: do not output PCRE version number at start\n");
700    printf("  -S <n>   set stack size to <n> megabytes\n");
701    printf("  -s       output store (memory) used information\n"
702           "  -t       time compilation and execution\n");
703    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
704    printf("  -tm      time execution (matching) only\n");
705    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
706    }
707    
708    
709    
710    /*************************************************
711    *                Main Program                    *
712    *************************************************/
713    
714  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
715  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 315  int options = 0; Line 722  int options = 0;
722  int study_options = 0;  int study_options = 0;
723  int op = 1;  int op = 1;
724  int timeit = 0;  int timeit = 0;
725    int timeitm = 0;
726  int showinfo = 0;  int showinfo = 0;
727  int showstore = 0;  int showstore = 0;
728    int quiet = 0;
729    int size_offsets = 45;
730    int size_offsets_max;
731    int *offsets = NULL;
732    #if !defined NOPOSIX
733  int posix = 0;  int posix = 0;
734    #endif
735  int debug = 0;  int debug = 0;
736  int done = 0;  int done = 0;
737  unsigned char buffer[30000];  int all_use_dfa = 0;
738  unsigned char dbuffer[1024];  int yield = 0;
739    int stack_size;
740    
741  /* Static so that new_malloc can use it. */  /* These vectors store, end-to-end, a list of captured substring names. Assume
742    that 1024 is plenty long enough for the few names we'll be testing. */
743    
744    uschar copynames[1024];
745    uschar getnames[1024];
746    
747    uschar *copynamesptr;
748    uschar *getnamesptr;
749    
750    /* Get buffers from malloc() so that Electric Fence will check their misuse
751    when I am debugging. They grow automatically when very long lines are read. */
752    
753    buffer = (unsigned char *)malloc(buffer_size);
754    dbuffer = (unsigned char *)malloc(buffer_size);
755    pbuffer = (unsigned char *)malloc(buffer_size);
756    
757    /* The outfile variable is static so that new_malloc can use it. */
758    
759  outfile = stdout;  outfile = stdout;
760    
761    /* The following  _setmode() stuff is some Windows magic that tells its runtime
762    library to translate CRLF into a single LF character. At least, that's what
763    I've been told: never having used Windows I take this all on trust. Originally
764    it set 0x8000, but then I was advised that _O_BINARY was better. */
765    
766    #if defined(_WIN32) || defined(WIN32)
767    _setmode( _fileno( stdout ), _O_BINARY );
768    #endif
769    
770  /* Scan options */  /* Scan options */
771    
772  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
773    {    {
774      unsigned char *endptr;
775    
776    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
777      showstore = 1;      showstore = 1;
778    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
779      else if (strcmp(argv[op], "-b") == 0) debug = 1;
780    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
781    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
782    #if !defined NODFA
783      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
784    #endif
785      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
786          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
787            *endptr == 0))
788        {
789        op++;
790        argc--;
791        }
792      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
793        {
794        int both = argv[op][2] == 0;
795        int temp;
796        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
797                         *endptr == 0))
798          {
799          timeitm = temp;
800          op++;
801          argc--;
802          }
803        else timeitm = LOOPREPEAT;
804        if (both) timeit = timeitm;
805        }
806      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
807          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
808            *endptr == 0))
809        {
810    #if defined(_WIN32) || defined(WIN32)
811        printf("PCRE: -S not supported on this OS\n");
812        exit(1);
813    #else
814        int rc;
815        struct rlimit rlim;
816        getrlimit(RLIMIT_STACK, &rlim);
817        rlim.rlim_cur = stack_size * 1024 * 1024;
818        rc = setrlimit(RLIMIT_STACK, &rlim);
819        if (rc != 0)
820          {
821        printf("PCRE: setrlimit() failed with error %d\n", rc);
822        exit(1);
823          }
824        op++;
825        argc--;
826    #endif
827        }
828    #if !defined NOPOSIX
829    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
830    #endif
831      else if (strcmp(argv[op], "-C") == 0)
832        {
833        int rc;
834        printf("PCRE version %s\n", pcre_version());
835        printf("Compiled with\n");
836        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
837        printf("  %sUTF-8 support\n", rc? "" : "No ");
838        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
839        printf("  %sUnicode properties support\n", rc? "" : "No ");
840        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
841        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
842          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
843          (rc == -1)? "ANY" : "???");
844        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
845        printf("  Internal link size = %d\n", rc);
846        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
847        printf("  POSIX malloc threshold = %d\n", rc);
848        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
849        printf("  Default match limit = %d\n", rc);
850        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
851        printf("  Default recursion depth limit = %d\n", rc);
852        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
853        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
854        exit(0);
855        }
856      else if (strcmp(argv[op], "-help") == 0 ||
857               strcmp(argv[op], "--help") == 0)
858        {
859        usage();
860        goto EXIT;
861        }
862    else    else
863      {      {
864      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
865      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
866      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
867             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
868      }      }
869    op++;    op++;
870    argc--;    argc--;
871    }    }
872    
873    /* Get the store for the offsets vector, and remember what it was */
874    
875    size_offsets_max = size_offsets;
876    offsets = (int *)malloc(size_offsets_max * sizeof(int));
877    if (offsets == NULL)
878      {
879      printf("** Failed to get %d bytes of memory for offsets vector\n",
880        size_offsets_max * sizeof(int));
881      yield = 1;
882      goto EXIT;
883      }
884    
885  /* Sort out the input and output files */  /* Sort out the input and output files */
886    
887  if (argc > 1)  if (argc > 1)
888    {    {
889    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
890    if (infile == NULL)    if (infile == NULL)
891      {      {
892      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
893      return 1;      yield = 1;
894        goto EXIT;
895      }      }
896    }    }
897    
898  if (argc > 2)  if (argc > 2)
899    {    {
900    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
901    if (outfile == NULL)    if (outfile == NULL)
902      {      {
903      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
904      return 1;      yield = 1;
905        goto EXIT;
906      }      }
907    }    }
908    
909  /* Set alternative malloc function */  /* Set alternative malloc function */
910    
911  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
912    pcre_free = new_free;
913    pcre_stack_malloc = stack_malloc;
914    pcre_stack_free = stack_free;
915    
916  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
917    
918  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
919    
920  /* Main loop */  /* Main loop */
921    
# Line 396  while (!done) Line 931  while (!done)
931    
932    const char *error;    const char *error;
933    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
934    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
935      const unsigned char *tables = NULL;
936      unsigned long int true_size, true_study_size = 0;
937      size_t size, regex_gotten_store;
938    int do_study = 0;    int do_study = 0;
939    int do_debug = debug;    int do_debug = debug;
940    int do_G = 0;    int do_G = 0;
941    int do_g = 0;    int do_g = 0;
942    int do_showinfo = showinfo;    int do_showinfo = showinfo;
943    int do_showrest = 0;    int do_showrest = 0;
944    int erroroffset, len, delimiter;    int do_flip = 0;
945      int erroroffset, len, delimiter, poffset;
946    
947      use_utf8 = 0;
948    
949    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
950    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
951    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
952      fflush(outfile);
953    
954    p = buffer;    p = buffer;
955    while (isspace(*p)) p++;    while (isspace(*p)) p++;
956    if (*p == 0) continue;    if (*p == 0) continue;
957    
958    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
959    complete, read more. */  
960      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
961        {
962        unsigned long int magic, get_options;
963        uschar sbuf[8];
964        FILE *f;
965    
966        p++;
967        pp = p + (int)strlen((char *)p);
968        while (isspace(pp[-1])) pp--;
969        *pp = 0;
970    
971        f = fopen((char *)p, "rb");
972        if (f == NULL)
973          {
974          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
975          continue;
976          }
977    
978        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
979    
980        true_size =
981          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
982        true_study_size =
983          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
984    
985        re = (real_pcre *)new_malloc(true_size);
986        regex_gotten_store = gotten_store;
987    
988        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
989    
990        magic = ((real_pcre *)re)->magic_number;
991        if (magic != MAGIC_NUMBER)
992          {
993          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
994            {
995            do_flip = 1;
996            }
997          else
998            {
999            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1000            fclose(f);
1001            continue;
1002            }
1003          }
1004    
1005        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1006          do_flip? " (byte-inverted)" : "", p);
1007    
1008        /* Need to know if UTF-8 for printing data strings */
1009    
1010        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1011        use_utf8 = (get_options & PCRE_UTF8) != 0;
1012    
1013        /* Now see if there is any following study data */
1014    
1015        if (true_study_size != 0)
1016          {
1017          pcre_study_data *psd;
1018    
1019          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1020          extra->flags = PCRE_EXTRA_STUDY_DATA;
1021    
1022          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1023          extra->study_data = psd;
1024    
1025          if (fread(psd, 1, true_study_size, f) != true_study_size)
1026            {
1027            FAIL_READ:
1028            fprintf(outfile, "Failed to read data from %s\n", p);
1029            if (extra != NULL) new_free(extra);
1030            if (re != NULL) new_free(re);
1031            fclose(f);
1032            continue;
1033            }
1034          fprintf(outfile, "Study data loaded from %s\n", p);
1035          do_study = 1;     /* To get the data output if requested */
1036          }
1037        else fprintf(outfile, "No study data\n");
1038    
1039        fclose(f);
1040        goto SHOW_INFO;
1041        }
1042    
1043      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1044      the pattern; if is isn't complete, read more. */
1045    
1046    delimiter = *p++;    delimiter = *p++;
1047    
# Line 425  while (!done) Line 1052  while (!done)
1052      }      }
1053    
1054    pp = p;    pp = p;
1055      poffset = p - buffer;
1056    
1057    for(;;)    for(;;)
1058      {      {
# Line 435  while (!done) Line 1063  while (!done)
1063        pp++;        pp++;
1064        }        }
1065      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1066      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1067      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1068        {        {
1069        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1070        done = 1;        done = 1;
# Line 453  while (!done) Line 1073  while (!done)
1073      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1074      }      }
1075    
1076      /* The buffer may have moved while being extended; reset the start of data
1077      pointer to the correct relative point in the buffer. */
1078    
1079      p = buffer + poffset;
1080    
1081    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1082    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1083    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1084    
1085    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1086    
1087    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1088      for callouts. */
1089    
1090    *pp++ = 0;    *pp++ = 0;
1091      strcpy((char *)pbuffer, (char *)p);
1092    
1093    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1094    
# Line 473  while (!done) Line 1100  while (!done)
1100      {      {
1101      switch (*pp++)      switch (*pp++)
1102        {        {
1103          case 'f': options |= PCRE_FIRSTLINE; break;
1104        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1105        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1106        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 481  while (!done) Line 1109  while (!done)
1109    
1110        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1111        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1112          case 'B': do_debug = 1; break;
1113          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1114        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1115        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1116          case 'F': do_flip = 1; break;
1117        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1118        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1119          case 'J': options |= PCRE_DUPNAMES; break;
1120        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1121          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1122    
1123  #if !defined NOPOSIX  #if !defined NOPOSIX
1124        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 494  while (!done) Line 1127  while (!done)
1127        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1128        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1129        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1130          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1131          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1132    
1133        case 'L':        case 'L':
1134        ppp = pp;        ppp = pp;
1135        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1136          /* The '0' test is just in case this is an unterminated line. */
1137          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1138        *ppp = 0;        *ppp = 0;
1139        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1140          {          {
1141          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1142          goto SKIP_DATA;          goto SKIP_DATA;
1143          }          }
1144        tables = pcre_maketables();        locale_set = 1;
1145        pp = ppp;        tables = pcre_maketables();
1146          pp = ppp;
1147          break;
1148    
1149          case '>':
1150          to_file = pp;
1151          while (*pp != 0) pp++;
1152          while (isspace(pp[-1])) pp--;
1153          *pp = 0;
1154          break;
1155    
1156          case '<':
1157            {
1158            int x = check_newline(pp, outfile);
1159            if (x == 0) goto SKIP_DATA;
1160            options |= x;
1161            while (*pp++ != '>');
1162            }
1163          break;
1164    
1165          case '\r':                      /* So that it works in Windows */
1166          case '\n':
1167          case ' ':
1168        break;        break;
1169    
       case '\n': case ' ': break;  
1170        default:        default:
1171        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1172        goto SKIP_DATA;        goto SKIP_DATA;
# Line 524  while (!done) Line 1182  while (!done)
1182      {      {
1183      int rc;      int rc;
1184      int cflags = 0;      int cflags = 0;
1185    
1186      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1187      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1188        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1189        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1190        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1191    
1192      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1193    
1194      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 533  while (!done) Line 1196  while (!done)
1196    
1197      if (rc != 0)      if (rc != 0)
1198        {        {
1199        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1200        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1201        goto SKIP_DATA;        goto SKIP_DATA;
1202        }        }
# Line 545  while (!done) Line 1208  while (!done)
1208  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1209    
1210      {      {
1211      if (timeit)      if (timeit > 0)
1212        {        {
1213        register int i;        register int i;
1214        clock_t time_taken;        clock_t time_taken;
1215        clock_t start_time = clock();        clock_t start_time = clock();
1216        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1217          {          {
1218          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1219          if (re != NULL) free(re);          if (re != NULL) free(re);
1220          }          }
1221        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1222        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1223          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1224          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1225        }        }
1226    
1227      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 574  while (!done) Line 1237  while (!done)
1237          {          {
1238          for (;;)          for (;;)
1239            {            {
1240            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1241              {              {
1242              done = 1;              done = 1;
1243              goto CONTINUE;              goto CONTINUE;
# Line 592  while (!done) Line 1255  while (!done)
1255      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
1256      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
1257    
1258        if (log_store)
1259          fprintf(outfile, "Memory allocation (code space): %d\n",
1260            (int)(gotten_store -
1261                  sizeof(real_pcre) -
1262                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1263    
1264        /* Extract the size for possible writing before possibly flipping it,
1265        and remember the store that was got. */
1266    
1267        true_size = ((real_pcre *)re)->size;
1268        regex_gotten_store = gotten_store;
1269    
1270        /* If /S was present, study the regexp to generate additional info to
1271        help with the matching. */
1272    
1273        if (do_study)
1274          {
1275          if (timeit > 0)
1276            {
1277            register int i;
1278            clock_t time_taken;
1279            clock_t start_time = clock();
1280            for (i = 0; i < timeit; i++)
1281              extra = pcre_study(re, study_options, &error);
1282            time_taken = clock() - start_time;
1283            if (extra != NULL) free(extra);
1284            fprintf(outfile, "  Study time %.4f milliseconds\n",
1285              (((double)time_taken * 1000.0) / (double)timeit) /
1286                (double)CLOCKS_PER_SEC);
1287            }
1288          extra = pcre_study(re, study_options, &error);
1289          if (error != NULL)
1290            fprintf(outfile, "Failed to study: %s\n", error);
1291          else if (extra != NULL)
1292            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1293          }
1294    
1295        /* If the 'F' option was present, we flip the bytes of all the integer
1296        fields in the regex data block and the study block. This is to make it
1297        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1298        compiled on a different architecture. */
1299    
1300        if (do_flip)
1301          {
1302          real_pcre *rre = (real_pcre *)re;
1303          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1304          rre->size = byteflip(rre->size, sizeof(rre->size));
1305          rre->options = byteflip(rre->options, sizeof(rre->options));
1306          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1307          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1308          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1309          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1310          rre->name_table_offset = byteflip(rre->name_table_offset,
1311            sizeof(rre->name_table_offset));
1312          rre->name_entry_size = byteflip(rre->name_entry_size,
1313            sizeof(rre->name_entry_size));
1314          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1315    
1316          if (extra != NULL)
1317            {
1318            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1319            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1320            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1321            }
1322          }
1323    
1324        /* Extract information from the compiled data if required */
1325    
1326        SHOW_INFO:
1327    
1328        if (do_debug)
1329          {
1330          fprintf(outfile, "------------------------------------------------------------------\n");
1331          pcre_printint(re, outfile);
1332          }
1333    
1334      if (do_showinfo)      if (do_showinfo)
1335        {        {
1336          unsigned long int get_options, all_options;
1337    #if !defined NOINFOCHECK
1338        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1339    #endif
1340        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1341        size_t size;        int nameentrysize, namecount;
1342          const uschar *nametable;
1343    
1344        if (do_debug) print_internals(re);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
   
       new_info(re, NULL, PCRE_INFO_OPTIONS, &options);  
1345        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1346        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1347        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1348        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1349        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1350          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1351          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1352          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1353    
1354    #if !defined NOINFOCHECK
1355        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1356        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1357          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 620  while (!done) Line 1365  while (!done)
1365            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1366              first_char, old_first_char);              first_char, old_first_char);
1367    
1368          if (old_options != options) fprintf(outfile,          if (old_options != (int)get_options) fprintf(outfile,
1369            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1370              old_options);              get_options, old_options);
1371          }          }
1372    #endif
1373    
1374        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1375          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1376          size, gotten_store);          (int)size, (int)regex_gotten_store);
1377    
1378        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1379        if (backrefmax > 0)        if (backrefmax > 0)
1380          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
       if (options == 0) fprintf(outfile, "No options\n");  
         else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
           ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
           ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
           ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
           ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
           ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
           ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
           ((options & PCRE_EXTRA) != 0)? " extra" : "",  
           ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
1381    
1382        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (namecount > 0)
1383          fprintf(outfile, "Case state changes\n");          {
1384            fprintf(outfile, "Named capturing subpatterns:\n");
1385            while (namecount-- > 0)
1386              {
1387              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1388                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1389                GET2(nametable, 0));
1390              nametable += nameentrysize;
1391              }
1392            }
1393    
1394          /* The NOPARTIAL bit is a private bit in the options, so we have
1395          to fish it out via out back door */
1396    
1397          all_options = ((real_pcre *)re)->options;
1398          if (do_flip)
1399            {
1400            all_options = byteflip(all_options, sizeof(all_options));
1401             }
1402    
1403          if ((all_options & PCRE_NOPARTIAL) != 0)
1404            fprintf(outfile, "Partial matching not supported\n");
1405    
1406          if (get_options == 0) fprintf(outfile, "No options\n");
1407            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1408              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1409              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1410              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1411              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1412              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1413              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1414              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1415              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1416              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1417              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1418              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1419              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1420              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1421    
1422          switch (get_options & PCRE_NEWLINE_BITS)
1423            {
1424            case PCRE_NEWLINE_CR:
1425            fprintf(outfile, "Forced newline sequence: CR\n");
1426            break;
1427    
1428            case PCRE_NEWLINE_LF:
1429            fprintf(outfile, "Forced newline sequence: LF\n");
1430            break;
1431    
1432            case PCRE_NEWLINE_CRLF:
1433            fprintf(outfile, "Forced newline sequence: CRLF\n");
1434            break;
1435    
1436            case PCRE_NEWLINE_ANY:
1437            fprintf(outfile, "Forced newline sequence: ANY\n");
1438            break;
1439    
1440            default:
1441            break;
1442            }
1443    
1444        if (first_char == -1)        if (first_char == -1)
1445          {          {
1446          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1447          }          }
1448        else if (first_char < 0)        else if (first_char < 0)
1449          {          {
# Line 656  while (!done) Line 1451  while (!done)
1451          }          }
1452        else        else
1453          {          {
1454          if (isprint(first_char))          int ch = first_char & 255;
1455            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1456              "" : " (caseless)";
1457            if (PRINTHEX(ch))
1458              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1459          else          else
1460            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1461          }          }
1462    
1463        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 1466  while (!done)
1466          }          }
1467        else        else
1468          {          {
1469          if (isprint(need_char))          int ch = need_char & 255;
1470            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1471              "" : " (caseless)";
1472            if (PRINTHEX(ch))
1473              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1474          else          else
1475            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1476          }          }
       }  
1477    
1478      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1479      help with the matching. */        value, but it varies, depending on the computer architecture, and
1480          so messes up the test suite. (And with the /F option, it might be
1481          flipped.) */
1482    
1483      if (do_study)        if (do_study)
       {  
       if (timeit)  
1484          {          {
1485          register int i;          if (extra == NULL)
1486          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1487          clock_t start_time = clock();          else
1488          for (i = 0; i < LOOPREPEAT; i++)            {
1489            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1490          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1491          if (extra != NULL) free(extra);  
1492          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1493            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1494            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1495                {
1496                int i;
1497                int c = 24;
1498                fprintf(outfile, "Starting byte set: ");
1499                for (i = 0; i < 256; i++)
1500                  {
1501                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1502                    {
1503                    if (c > 75)
1504                      {
1505                      fprintf(outfile, "\n  ");
1506                      c = 2;
1507                      }
1508                    if (PRINTHEX(i) && i != ' ')
1509                      {
1510                      fprintf(outfile, "%c ", i);
1511                      c += 2;
1512                      }
1513                    else
1514                      {
1515                      fprintf(outfile, "\\x%02x ", i);
1516                      c += 5;
1517                      }
1518                    }
1519                  }
1520                fprintf(outfile, "\n");
1521                }
1522              }
1523          }          }
1524          }
1525    
1526        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1527        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1528          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1529    
1530        else if (do_showinfo)      if (to_file != NULL)
1531          {
1532          FILE *f = fopen((char *)to_file, "wb");
1533          if (f == NULL)
1534            {
1535            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1536            }
1537          else
1538          {          {
1539          uschar *start_bits = NULL;          uschar sbuf[8];
1540          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          sbuf[0] = (true_size >> 24)  & 255;
1541          if (start_bits == NULL)          sbuf[1] = (true_size >> 16)  & 255;
1542            fprintf(outfile, "No starting character set\n");          sbuf[2] = (true_size >>  8)  & 255;
1543            sbuf[3] = (true_size)  & 255;
1544    
1545            sbuf[4] = (true_study_size >> 24)  & 255;
1546            sbuf[5] = (true_study_size >> 16)  & 255;
1547            sbuf[6] = (true_study_size >>  8)  & 255;
1548            sbuf[7] = (true_study_size)  & 255;
1549    
1550            if (fwrite(sbuf, 1, 8, f) < 8 ||
1551                fwrite(re, 1, true_size, f) < true_size)
1552              {
1553              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1554              }
1555          else          else
1556            {            {
1557            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1558            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1559              {              {
1560              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1561                    true_study_size)
1562                {                {
1563                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1564                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1565                }                }
1566                else fprintf(outfile, "Study data written to %s\n", to_file);
1567    
1568              }              }
           fprintf(outfile, "\n");  
1569            }            }
1570            fclose(f);
1571          }          }
1572    
1573          new_free(re);
1574          if (extra != NULL) new_free(extra);
1575          if (tables != NULL) new_free((void *)tables);
1576          continue;  /* With next regex */
1577        }        }
1578      }      }        /* End of non-POSIX compile */
1579    
1580    /* Read data lines and test them */    /* Read data lines and test them */
1581    
1582    for (;;)    for (;;)
1583      {      {
1584      unsigned char *q;      uschar *q;
1585      unsigned char *bptr = dbuffer;      uschar *bptr = dbuffer;
1586        int *use_offsets = offsets;
1587        int use_size_offsets = size_offsets;
1588        int callout_data = 0;
1589        int callout_data_set = 0;
1590      int count, c;      int count, c;
1591      int copystrings = 0;      int copystrings = 0;
1592        int find_match_limit = 0;
1593      int getstrings = 0;      int getstrings = 0;
1594      int getlist = 0;      int getlist = 0;
1595      int gmatched = 0;      int gmatched = 0;
1596      int start_offset = 0;      int start_offset = 0;
1597      int g_notempty = 0;      int g_notempty = 0;
1598      int offsets[45];      int use_dfa = 0;
     int size_offsets = sizeof(offsets)/sizeof(int);  
1599    
1600      options = 0;      options = 0;
1601    
1602      if (infile == stdin) printf("data> ");      *copynames = 0;
1603      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1604    
1605        copynamesptr = copynames;
1606        getnamesptr = getnames;
1607    
1608        pcre_callout = callout;
1609        first_callout = 1;
1610        callout_extra = 0;
1611        callout_count = 0;
1612        callout_fail_count = 999999;
1613        callout_fail_id = -1;
1614        show_malloc = 0;
1615    
1616        if (extra != NULL) extra->flags &=
1617          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1618    
1619        len = 0;
1620        for (;;)
1621        {        {
1622        done = 1;        if (infile == stdin) printf("data> ");
1623        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1624            {
1625            if (len > 0) break;
1626            done = 1;
1627            goto CONTINUE;
1628            }
1629          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1630          len = (int)strlen((char *)buffer);
1631          if (buffer[len-1] == '\n') break;
1632        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1633    
     len = (int)strlen((char *)buffer);  
1634      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1635      buffer[len] = 0;      buffer[len] = 0;
1636      if (len == 0) break;      if (len == 0) break;
# Line 777  while (!done) Line 1643  while (!done)
1643        {        {
1644        int i = 0;        int i = 0;
1645        int n = 0;        int n = 0;
1646    
1647        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1648          {          {
1649          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 793  while (!done) Line 1660  while (!done)
1660          c -= '0';          c -= '0';
1661          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1662            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1663    
1664    #if !defined NOUTF8
1665            if (use_utf8 && c > 255)
1666              {
1667              unsigned char buff8[8];
1668              int ii, utn;
1669              utn = ord2utf8(c, buff8);
1670              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1671              c = buff8[ii];   /* Last byte */
1672              }
1673    #endif
1674          break;          break;
1675    
1676          case 'x':          case 'x':
1677    
1678            /* Handle \x{..} specially - new Perl thing for utf8 */
1679    
1680    #if !defined NOUTF8
1681            if (*p == '{')
1682              {
1683              unsigned char *pt = p;
1684              c = 0;
1685              while (isxdigit(*(++pt)))
1686                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1687              if (*pt == '}')
1688                {
1689                unsigned char buff8[8];
1690                int ii, utn;
1691                utn = ord2utf8(c, buff8);
1692                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1693                c = buff8[ii];   /* Last byte */
1694                p = pt + 1;
1695                break;
1696                }
1697              /* Not correct form; fall through */
1698              }
1699    #endif
1700    
1701            /* Ordinary \x */
1702    
1703          c = 0;          c = 0;
1704          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1705            {            {
# Line 804  while (!done) Line 1708  while (!done)
1708            }            }
1709          break;          break;
1710    
1711          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1712          p--;          p--;
1713          continue;          continue;
1714    
1715            case '>':
1716            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1717            continue;
1718    
1719          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1720          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1721          continue;          continue;
# Line 817  while (!done) Line 1725  while (!done)
1725          continue;          continue;
1726    
1727          case 'C':          case 'C':
1728          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1729          copystrings |= 1 << n;            {
1730              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1731              copystrings |= 1 << n;
1732              }
1733            else if (isalnum(*p))
1734              {
1735              uschar *npp = copynamesptr;
1736              while (isalnum(*p)) *npp++ = *p++;
1737              *npp++ = 0;
1738              *npp = 0;
1739              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1740              if (n < 0)
1741                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1742              copynamesptr = npp;
1743              }
1744            else if (*p == '+')
1745              {
1746              callout_extra = 1;
1747              p++;
1748              }
1749            else if (*p == '-')
1750              {
1751              pcre_callout = NULL;
1752              p++;
1753              }
1754            else if (*p == '!')
1755              {
1756              callout_fail_id = 0;
1757              p++;
1758              while(isdigit(*p))
1759                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1760              callout_fail_count = 0;
1761              if (*p == '!')
1762                {
1763                p++;
1764                while(isdigit(*p))
1765                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1766                }
1767              }
1768            else if (*p == '*')
1769              {
1770              int sign = 1;
1771              callout_data = 0;
1772              if (*(++p) == '-') { sign = -1; p++; }
1773              while(isdigit(*p))
1774                callout_data = callout_data * 10 + *p++ - '0';
1775              callout_data *= sign;
1776              callout_data_set = 1;
1777              }
1778            continue;
1779    
1780    #if !defined NODFA
1781            case 'D':
1782    #if !defined NOPOSIX
1783            if (posix || do_posix)
1784              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1785            else
1786    #endif
1787              use_dfa = 1;
1788            continue;
1789    
1790            case 'F':
1791            options |= PCRE_DFA_SHORTEST;
1792          continue;          continue;
1793    #endif
1794    
1795          case 'G':          case 'G':
1796          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1797          getstrings |= 1 << n;            {
1798              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1799              getstrings |= 1 << n;
1800              }
1801            else if (isalnum(*p))
1802              {
1803              uschar *npp = getnamesptr;
1804              while (isalnum(*p)) *npp++ = *p++;
1805              *npp++ = 0;
1806              *npp = 0;
1807              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1808              if (n < 0)
1809                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1810              getnamesptr = npp;
1811              }
1812          continue;          continue;
1813    
1814          case 'L':          case 'L':
1815          getlist = 1;          getlist = 1;
1816          continue;          continue;
1817    
1818            case 'M':
1819            find_match_limit = 1;
1820            continue;
1821    
1822          case 'N':          case 'N':
1823          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1824          continue;          continue;
1825    
1826          case 'O':          case 'O':
1827          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1828          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1829              {
1830              size_offsets_max = n;
1831              free(offsets);
1832              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1833              if (offsets == NULL)
1834                {
1835                printf("** Failed to get %d bytes of memory for offsets vector\n",
1836                  size_offsets_max * sizeof(int));
1837                yield = 1;
1838                goto EXIT;
1839                }
1840              }
1841            use_size_offsets = n;
1842            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1843            continue;
1844    
1845            case 'P':
1846            options |= PCRE_PARTIAL;
1847            continue;
1848    
1849            case 'Q':
1850            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1851            if (extra == NULL)
1852              {
1853              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1854              extra->flags = 0;
1855              }
1856            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1857            extra->match_limit_recursion = n;
1858            continue;
1859    
1860            case 'q':
1861            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1862            if (extra == NULL)
1863              {
1864              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1865              extra->flags = 0;
1866              }
1867            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1868            extra->match_limit = n;
1869            continue;
1870    
1871    #if !defined NODFA
1872            case 'R':
1873            options |= PCRE_DFA_RESTART;
1874            continue;
1875    #endif
1876    
1877            case 'S':
1878            show_malloc = 1;
1879          continue;          continue;
1880    
1881          case 'Z':          case 'Z':
1882          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1883          continue;          continue;
1884    
1885            case '?':
1886            options |= PCRE_NO_UTF8_CHECK;
1887            continue;
1888    
1889            case '<':
1890              {
1891              int x = check_newline(p, outfile);
1892              if (x == 0) goto NEXT_DATA;
1893              options |= x;
1894              while (*p++ != '>');
1895              }
1896            continue;
1897          }          }
1898        *q++ = c;        *q++ = c;
1899        }        }
1900      *q = 0;      *q = 0;
1901      len = q - dbuffer;      len = q - dbuffer;
1902    
1903        if ((all_use_dfa || use_dfa) && find_match_limit)
1904          {
1905          printf("**Match limit not relevant for DFA matching: ignored\n");
1906          find_match_limit = 0;
1907          }
1908    
1909      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1910      support timing. */      support timing or playing with the match limit or callout data. */
1911    
1912  #if !defined NOPOSIX  #if !defined NOPOSIX
1913      if (posix || do_posix)      if (posix || do_posix)
1914        {        {
1915        int rc;        int rc;
1916        int eflags = 0;        int eflags = 0;
1917        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
1918          if (use_size_offsets > 0)
1919            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1920        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1921        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1922    
1923        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1924    
1925        if (rc != 0)        if (rc != 0)
1926          {          {
1927          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1928          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1929          }          }
1930          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1931                  != 0)
1932            {
1933            fprintf(outfile, "Matched with REG_NOSUB\n");
1934            }
1935        else        else
1936          {          {
1937          size_t i;          size_t i;
1938          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1939            {            {
1940            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1941              {              {
1942              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1943              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1944                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1945              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1946              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1947                {                {
1948                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1949                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1950                    outfile);
1951                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1952                }                }
1953              }              }
1954            }            }
1955          }          }
1956          free(pmatch);
1957        }        }
1958    
1959      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 896  while (!done) Line 1963  while (!done)
1963    
1964      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1965        {        {
1966        if (timeit)        if (timeitm > 0)
1967          {          {
1968          register int i;          register int i;
1969          clock_t time_taken;          clock_t time_taken;
1970          clock_t start_time = clock();          clock_t start_time = clock();
1971          for (i = 0; i < LOOPREPEAT; i++)  
1972    #if !defined NODFA
1973            if (all_use_dfa || use_dfa)
1974              {
1975              int workspace[1000];
1976              for (i = 0; i < timeitm; i++)
1977                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1978                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1979                  sizeof(workspace)/sizeof(int));
1980              }
1981            else
1982    #endif
1983    
1984            for (i = 0; i < timeitm; i++)
1985            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1986              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1987    
1988          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1989          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
1990            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
1991            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1992            }
1993    
1994          /* If find_match_limit is set, we want to do repeated matches with
1995          varying limits in order to find the minimum value for the match limit and
1996          for the recursion limit. */
1997    
1998          if (find_match_limit)
1999            {
2000            if (extra == NULL)
2001              {
2002              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2003              extra->flags = 0;
2004              }
2005    
2006            (void)check_match_limit(re, extra, bptr, len, start_offset,
2007              options|g_notempty, use_offsets, use_size_offsets,
2008              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2009              PCRE_ERROR_MATCHLIMIT, "match()");
2010    
2011            count = check_match_limit(re, extra, bptr, len, start_offset,
2012              options|g_notempty, use_offsets, use_size_offsets,
2013              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2014              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2015          }          }
2016    
2017        count = pcre_exec(re, extra, (char *)bptr, len,        /* If callout_data is set, use the interface with additional data */
2018          start_offset, options | g_notempty, offsets, size_offsets);  
2019          else if (callout_data_set)
2020            {
2021            if (extra == NULL)
2022              {
2023              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2024              extra->flags = 0;
2025              }
2026            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2027            extra->callout_data = &callout_data;
2028            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2029              options | g_notempty, use_offsets, use_size_offsets);
2030            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2031            }
2032    
2033          /* The normal case is just to do the match once, with the default
2034          value of match_limit. */
2035    
2036    #if !defined NODFA
2037          else if (all_use_dfa || use_dfa)
2038            {
2039            int workspace[1000];
2040            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2041              options | g_notempty, use_offsets, use_size_offsets, workspace,
2042              sizeof(workspace)/sizeof(int));
2043            if (count == 0)
2044              {
2045              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2046              count = use_size_offsets/2;
2047              }
2048            }
2049    #endif
2050    
2051        if (count == 0)        else
2052          {          {
2053          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2054          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2055            if (count == 0)
2056              {
2057              fprintf(outfile, "Matched, but too many substrings\n");
2058              count = use_size_offsets/3;
2059              }
2060          }          }
2061    
2062        /* Matched */        /* Matched */
2063    
2064        if (count >= 0)        if (count >= 0)
2065          {          {
2066          int i;          int i, maxcount;
2067    
2068    #if !defined NODFA
2069            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2070    #endif
2071              maxcount = use_size_offsets/3;
2072    
2073            /* This is a check against a lunatic return value. */
2074    
2075            if (count > maxcount)
2076              {
2077              fprintf(outfile,
2078                "** PCRE error: returned count %d is too big for offset size %d\n",
2079                count, use_size_offsets);
2080              count = use_size_offsets/3;
2081              if (do_g || do_G)
2082                {
2083                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2084                do_g = do_G = FALSE;        /* Break g/G loop */
2085                }
2086              }
2087    
2088          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2089            {            {
2090            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2091              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2092            else            else
2093              {              {
2094              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2095              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2096                  use_offsets[i+1] - use_offsets[i], outfile);
2097              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2098              if (i == 0)              if (i == 0)
2099                {                {
2100                if (do_showrest)                if (do_showrest)
2101                  {                  {
2102                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
2103                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2104                      outfile);
2105                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
2106                  }                  }
2107                }                }
# Line 949  while (!done) Line 2112  while (!done)
2112            {            {
2113            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2114              {              {
2115              char copybuffer[16];              char copybuffer[256];
2116              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2117                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2118              if (rc < 0)              if (rc < 0)
2119                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 959  while (!done) Line 2122  while (!done)
2122              }              }
2123            }            }
2124    
2125            for (copynamesptr = copynames;
2126                 *copynamesptr != 0;
2127                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2128              {
2129              char copybuffer[256];
2130              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2131                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2132              if (rc < 0)
2133                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2134              else
2135                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2136              }
2137    
2138          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2139            {            {
2140            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
2141              {              {
2142              const char *substring;              const char *substring;
2143              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2144                i, &substring);                i, &substring);
2145              if (rc < 0)              if (rc < 0)
2146                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
2147              else              else
2148                {                {
2149                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2150                free((void *)substring);                pcre_free_substring(substring);
2151                }                }
2152              }              }
2153            }            }
2154    
2155            for (getnamesptr = getnames;
2156                 *getnamesptr != 0;
2157                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2158              {
2159              const char *substring;
2160              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2161                count, (char *)getnamesptr, &substring);
2162              if (rc < 0)
2163                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2164              else
2165                {
2166                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2167                pcre_free_substring(substring);
2168                }
2169              }
2170    
2171          if (getlist)          if (getlist)
2172            {            {
2173            const char **stringlist;            const char **stringlist;
2174            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2175              &stringlist);              &stringlist);
2176            if (rc < 0)            if (rc < 0)
2177              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 989  while (!done) Line 2181  while (!done)
2181                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2182              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2183                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
2184              free((void *)stringlist);              /* free((void *)stringlist); */
2185                pcre_free_substring_list(stringlist);
2186              }              }
2187            }            }
2188          }          }
2189    
2190          /* There was a partial match */
2191    
2192          else if (count == PCRE_ERROR_PARTIAL)
2193            {
2194            fprintf(outfile, "Partial match");
2195    #if !defined NODFA
2196            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2197              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2198                bptr + use_offsets[0]);
2199    #endif
2200            fprintf(outfile, "\n");
2201            break;  /* Out of the /g loop */
2202            }
2203    
2204        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2205        PCRE_NOTEMPTY after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
2206        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
2207        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
2208        was checked before setting PCRE_NOTEMPTY. */        offset values to achieve this. We won't be at the end of the string -
2209          that was checked before setting g_notempty. */
2210    
2211        else        else
2212          {          {
2213          if (g_notempty != 0)          if (g_notempty != 0)
2214            {            {
2215            offsets[0] = start_offset;            int onechar = 1;
2216            offsets[1] = start_offset + 1;            use_offsets[0] = start_offset;
2217              if (use_utf8)
2218                {
2219                while (start_offset + onechar < len)
2220                  {
2221                  int tb = bptr[start_offset+onechar];
2222                  if (tb <= 127) break;
2223                  tb &= 0xc0;
2224                  if (tb != 0 && tb != 0xc0) onechar++;
2225                  }
2226                }
2227              use_offsets[1] = start_offset + onechar;
2228            }            }
2229          else          else
2230            {            {
2231            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2232              {              {
2233              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
2234              }              }
2235              else fprintf(outfile, "Error %d\n", count);
2236            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2237            }            }
2238          }          }
# Line 1025  while (!done) Line 2244  while (!done)
2244        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2245        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic
2246        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
2247        we set PCRE_NOTEMPTY and try the match again at the same point. If this        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2248        fails (picked up above) we advance to the next character. */        same point. If this fails (picked up above) we advance to the next
2249          character. */
2250    
2251        g_notempty = 0;        g_notempty = 0;
2252        if (offsets[0] == offsets[1])        if (use_offsets[0] == use_offsets[1])
2253          {          {
2254          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
2255          g_notempty = PCRE_NOTEMPTY;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2256          }          }
2257    
2258        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
2259    
2260        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
2261    
2262        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
2263    
2264        else        else
2265          {          {
2266          bptr += offsets[1];          bptr += use_offsets[1];
2267          len -= offsets[1];          len -= use_offsets[1];
2268          }          }
2269        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2270    
2271        NEXT_DATA: continue;
2272      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2273    
2274    CONTINUE:    CONTINUE:
# Line 1055  while (!done) Line 2277  while (!done)
2277    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2278  #endif  #endif
2279    
2280    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2281    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2282    if (tables != NULL)    if (tables != NULL)
2283      {      {
2284      free((void *)tables);      new_free((void *)tables);
2285      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2286        locale_set = 0;
2287      }      }
2288    }    }
2289    
2290  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2291  return 0;  
2292    EXIT:
2293    
2294    if (infile != NULL && infile != stdin) fclose(infile);
2295    if (outfile != NULL && outfile != stdout) fclose(outfile);
2296    
2297    free(buffer);
2298    free(dbuffer);
2299    free(pbuffer);
2300    free(offsets);
2301    
2302    return yield;
2303  }  }
2304    
2305  /* End */  /* End of pcretest.c */

Legend:
Removed from v.45  
changed lines
  Added in v.93

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12