/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 35 by nigel, Sat Feb 24 21:39:05 2007 UTC revision 123 by ph10, Mon Mar 12 15:19:06 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47    
48    /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
69    
70    #define PCRE_SPY        /* For Win32 build, import data, not export */
71    
72    /* We include pcre_internal.h because we need the internal info for displaying
73    the results of pcre_study() and we also need to know about the internal
74    macros, structures, and other internal data values; pcretest has "inside
75    information" compared to a program that strictly follows the PCRE API. */
76    
77    #include "pcre_internal.h"
78    
79    /* We need access to the data tables that PCRE uses. So as not to have to keep
80    two copies, we include the source file here, changing the names of the external
81    symbols to prevent clashes. */
82    
83  /* Use the internal info for displaying the results of pcre_study(). */  #define _pcre_utf8_table1      utf8_table1
84    #define _pcre_utf8_table1_size utf8_table1_size
85    #define _pcre_utf8_table2      utf8_table2
86    #define _pcre_utf8_table3      utf8_table3
87    #define _pcre_utf8_table4      utf8_table4
88    #define _pcre_utt              utt
89    #define _pcre_utt_size         utt_size
90    #define _pcre_OP_lengths       OP_lengths
91    
92  #include "internal.h"  #include "pcre_tables.c"
93    
94    /* We also need the pcre_printint() function for printing out compiled
95    patterns. This function is in a separate file so that it can be included in
96    pcre_compile.c when that module is compiled with debugging enabled.
97    
98    The definition of the macro PRINTABLE, which determines whether to print an
99    output character as-is or as a hex value when showing compiled patterns, is
100    contained in this file. We uses it here also, in cases when the locale has not
101    been explicitly changed, so as to get consistent output from systems that
102    differ in their output from isprint() even in the "C" locale. */
103    
104    #include "pcre_printint.src"
105    
106    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107    
108    
109    /* It is possible to compile this test program without including support for
110    testing the POSIX interface, though this is not available via the standard
111    Makefile. */
112    
113    #if !defined NOPOSIX
114  #include "pcreposix.h"  #include "pcreposix.h"
115    #endif
116    
117    /* It is also possible, for the benefit of the version currently imported into
118    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
119    interface to the DFA matcher (NODFA), and without the doublecheck of the old
120    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
121    UTF8 support if PCRE is built without it. */
122    
123    #ifndef SUPPORT_UTF8
124    #ifndef NOUTF8
125    #define NOUTF8
126    #endif
127    #endif
128    
129    
130    /* Other parameters */
131    
132  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
133  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 137 
137  #endif  #endif
138  #endif  #endif
139    
140  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
141    
142    #define LOOPREPEAT 500000
143    
144    /* Static variables */
145    
146  static FILE *outfile;  static FILE *outfile;
147  static int log_store = 0;  static int log_store = 0;
148    static int callout_count;
149    static int callout_extra;
150    static int callout_fail_count;
151    static int callout_fail_id;
152    static int first_callout;
153    static int locale_set = 0;
154    static int show_malloc;
155    static int use_utf8;
156    static size_t gotten_store;
157    
158    /* The buffers grow automatically if very long input lines are encountered. */
159    
160    static int buffer_size = 50000;
161    static uschar *buffer = NULL;
162    static uschar *dbuffer = NULL;
163    static uschar *pbuffer = NULL;
164    
165    
166    
167  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
168  code as contained in pcre.c under the DEBUG macro. */  *        Read or extend an input line            *
169    *************************************************/
170    
171  static const char *OP_names[] = {  /* Input lines are read into buffer, but both patterns and data lines can be
172    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  continued over multiple input lines. In addition, if the buffer fills up, we
173    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  want to automatically expand it so as to be able to handle extremely large
174    "Opt", "^", "$", "Any", "chars", "not",  lines that are needed for certain stress tests. When the input buffer is
175    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  expanded, the other two buffers must also be expanded likewise, and the
176    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  contents of pbuffer, which are a copy of the input for callouts, must be
177    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  preserved (for when expansion happens for a data line). This is not the most
178    "*", "*?", "+", "+?", "?", "??", "{", "{",  optimal way of handling this, but hey, this is just a test program!
179    "class", "Ref",  
180    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  Arguments:
181    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    f            the file to read
182    "Brazero", "Braminzero", "Bra"    start        where in buffer to start (this *must* be within buffer)
183  };  
184    Returns:       pointer to the start of new data
185                   could be a copy of start, or could be moved
186  static void print_internals(pcre *re, FILE *outfile)                 NULL if no data read and EOF reached
187  {  */
 unsigned char *code = ((real_pcre *)re)->code;  
   
 fprintf(outfile, "------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
188    
189          case OP_CRRANGE:  static uschar *
190          case OP_CRMINRANGE:  extend_inputline(FILE *f, uschar *start)
191          min = (code[1] << 8) + code[2];  {
192          max = (code[3] << 8) + code[4];  uschar *here = start;
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
193    
194          default:  for (;;)
195          code--;    {
196          }    int rlen = buffer_size - (here - buffer);
197    
198      if (rlen > 1000)
199        {
200        int dlen;
201        if (fgets((char *)here, rlen,  f) == NULL)
202          return (here == start)? NULL : start;
203        dlen = (int)strlen((char *)here);
204        if (dlen > 0 && here[dlen - 1] == '\n') return start;
205        here += dlen;
206        }
207    
208      else
209        {
210        int new_buffer_size = 2*buffer_size;
211        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
212        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
213        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
214    
215        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
216          {
217          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
218          exit(1);
219        }        }
     break;  
220    
221      /* Anything else is just a one-node item */      memcpy(new_buffer, buffer, buffer_size);
222        memcpy(new_pbuffer, pbuffer, buffer_size);
223    
224        buffer_size = new_buffer_size;
225    
226      default:      start = new_buffer + (start - buffer);
227      fprintf(outfile, "    %s", OP_names[*code]);      here = new_buffer + (here - buffer);
228      break;  
229        free(buffer);
230        free(dbuffer);
231        free(pbuffer);
232    
233        buffer = new_buffer;
234        dbuffer = new_dbuffer;
235        pbuffer = new_pbuffer;
236      }      }
237      }
238    
239    return NULL;  /* Control never gets here */
240    }
241    
242    
243    code++;  
244    fprintf(outfile, "\n");  
245    
246    
247    
248    /*************************************************
249    *          Read number from string               *
250    *************************************************/
251    
252    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
253    around with conditional compilation, just do the job by hand. It is only used
254    for unpicking arguments, so just keep it simple.
255    
256    Arguments:
257      str           string to be converted
258      endptr        where to put the end pointer
259    
260    Returns:        the unsigned long
261    */
262    
263    static int
264    get_value(unsigned char *str, unsigned char **endptr)
265    {
266    int result = 0;
267    while(*str != 0 && isspace(*str)) str++;
268    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
269    *endptr = str;
270    return(result);
271    }
272    
273    
274    
275    
276    /*************************************************
277    *            Convert UTF-8 string to value       *
278    *************************************************/
279    
280    /* This function takes one or more bytes that represents a UTF-8 character,
281    and returns the value of the character.
282    
283    Argument:
284      utf8bytes   a pointer to the byte vector
285      vptr        a pointer to an int to receive the value
286    
287    Returns:      >  0 => the number of bytes consumed
288                  -6 to 0 => malformed UTF-8 character at offset = (-return)
289    */
290    
291    #if !defined NOUTF8
292    
293    static int
294    utf82ord(unsigned char *utf8bytes, int *vptr)
295    {
296    int c = *utf8bytes++;
297    int d = c;
298    int i, j, s;
299    
300    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
301      {
302      if ((d & 0x80) == 0) break;
303      d <<= 1;
304    }    }
305    
306    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
307    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
308    
309    /* i now has a value in the range 1-5 */
310    
311    s = 6*i;
312    d = (c & utf8_table3[i]) << s;
313    
314    for (j = 0; j < i; j++)
315      {
316      c = *utf8bytes++;
317      if ((c & 0xc0) != 0x80) return -(j+1);
318      s -= 6;
319      d |= (c & 0x3f) << s;
320      }
321    
322    /* Check that encoding was the correct unique one */
323    
324    for (j = 0; j < utf8_table1_size; j++)
325      if (d <= utf8_table1[j]) break;
326    if (j != i) return -(i+1);
327    
328    /* Valid value */
329    
330    *vptr = d;
331    return i+1;
332  }  }
333    
334    #endif
335    
336    
337    
338  /* Character string printing function. */  /*************************************************
339    *       Convert character value to UTF-8         *
340    *************************************************/
341    
342    /* This function takes an integer value in the range 0 - 0x7fffffff
343    and encodes it as a UTF-8 character in 0 to 6 bytes.
344    
345    Arguments:
346      cvalue     the character value
347      utf8bytes  pointer to buffer for result - at least 6 bytes long
348    
349  static void pchars(unsigned char *p, int length)  Returns:     number of characters placed in the buffer
350    */
351    
352    #if !defined NOUTF8
353    
354    static int
355    ord2utf8(int cvalue, uschar *utf8bytes)
356  {  {
357  int c;  register int i, j;
358    for (i = 0; i < utf8_table1_size; i++)
359      if (cvalue <= utf8_table1[i]) break;
360    utf8bytes += i;
361    for (j = i; j > 0; j--)
362     {
363     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
364     cvalue >>= 6;
365     }
366    *utf8bytes = utf8_table2[i] | cvalue;
367    return i + 1;
368    }
369    
370    #endif
371    
372    
373    
374    /*************************************************
375    *             Print character string             *
376    *************************************************/
377    
378    /* Character string printing function. Must handle UTF-8 strings in utf8
379    mode. Yields number of characters printed. If handed a NULL file, just counts
380    chars without printing. */
381    
382    static int pchars(unsigned char *p, int length, FILE *f)
383    {
384    int c = 0;
385    int yield = 0;
386    
387  while (length-- > 0)  while (length-- > 0)
388    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
389      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
390      if (use_utf8)
391        {
392        int rc = utf82ord(p, &c);
393    
394        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
395          {
396          length -= rc - 1;
397          p += rc;
398          if (PRINTHEX(c))
399            {
400            if (f != NULL) fprintf(f, "%c", c);
401            yield++;
402            }
403          else
404            {
405            int n = 4;
406            if (f != NULL) fprintf(f, "\\x{%02x}", c);
407            yield += (n <= 0x000000ff)? 2 :
408                     (n <= 0x00000fff)? 3 :
409                     (n <= 0x0000ffff)? 4 :
410                     (n <= 0x000fffff)? 5 : 6;
411            }
412          continue;
413          }
414        }
415    #endif
416    
417       /* Not UTF-8, or malformed UTF-8  */
418    
419      c = *p++;
420      if (PRINTHEX(c))
421        {
422        if (f != NULL) fprintf(f, "%c", c);
423        yield++;
424        }
425      else
426        {
427        if (f != NULL) fprintf(f, "\\x%02x", c);
428        yield += 4;
429        }
430      }
431    
432    return yield;
433  }  }
434    
435    
436    
437    /*************************************************
438    *              Callout function                  *
439    *************************************************/
440    
441    /* Called from PCRE as a result of the (?C) item. We print out where we are in
442    the match. Yield zero unless more callouts than the fail count, or the callout
443    data is not zero. */
444    
445    static int callout(pcre_callout_block *cb)
446    {
447    FILE *f = (first_callout | callout_extra)? outfile : NULL;
448    int i, pre_start, post_start, subject_length;
449    
450    if (callout_extra)
451      {
452      fprintf(f, "Callout %d: last capture = %d\n",
453        cb->callout_number, cb->capture_last);
454    
455      for (i = 0; i < cb->capture_top * 2; i += 2)
456        {
457        if (cb->offset_vector[i] < 0)
458          fprintf(f, "%2d: <unset>\n", i/2);
459        else
460          {
461          fprintf(f, "%2d: ", i/2);
462          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
463            cb->offset_vector[i+1] - cb->offset_vector[i], f);
464          fprintf(f, "\n");
465          }
466        }
467      }
468    
469    /* Re-print the subject in canonical form, the first time or if giving full
470    datails. On subsequent calls in the same match, we use pchars just to find the
471    printed lengths of the substrings. */
472    
473    if (f != NULL) fprintf(f, "--->");
474    
475    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
476    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
477      cb->current_position - cb->start_match, f);
478    
479    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
480    
481    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
482      cb->subject_length - cb->current_position, f);
483    
484    if (f != NULL) fprintf(f, "\n");
485    
486    /* Always print appropriate indicators, with callout number if not already
487    shown. For automatic callouts, show the pattern offset. */
488    
489    if (cb->callout_number == 255)
490      {
491      fprintf(outfile, "%+3d ", cb->pattern_position);
492      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
493      }
494    else
495      {
496      if (callout_extra) fprintf(outfile, "    ");
497        else fprintf(outfile, "%3d ", cb->callout_number);
498      }
499    
500    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
501    fprintf(outfile, "^");
502    
503    if (post_start > 0)
504      {
505      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
506      fprintf(outfile, "^");
507      }
508    
509    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
510      fprintf(outfile, " ");
511    
512    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
513      pbuffer + cb->pattern_position);
514    
515    fprintf(outfile, "\n");
516    first_callout = 0;
517    
518    if (cb->callout_data != NULL)
519      {
520      int callout_data = *((int *)(cb->callout_data));
521      if (callout_data != 0)
522        {
523        fprintf(outfile, "Callout data = %d\n", callout_data);
524        return callout_data;
525        }
526      }
527    
528    return (cb->callout_number != callout_fail_id)? 0 :
529           (++callout_count >= callout_fail_count)? 1 : 0;
530    }
531    
532    
533    /*************************************************
534    *            Local malloc functions              *
535    *************************************************/
536    
537  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
538  compiled re. */  compiled re. */
539    
540  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
541  {  {
542  if (log_store)  void *block = malloc(size);
543    fprintf(outfile, "Memory allocation (code space): %d\n",  gotten_store = size;
544      (int)((int)size - offsetof(real_pcre, code[0])));  if (show_malloc)
545  return malloc(size);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
546    return block;
547    }
548    
549    static void new_free(void *block)
550    {
551    if (show_malloc)
552      fprintf(outfile, "free             %p\n", block);
553    free(block);
554  }  }
555    
556    
557    /* For recursion malloc/free, to test stacking calls */
558    
559    static void *stack_malloc(size_t size)
560    {
561    void *block = malloc(size);
562    if (show_malloc)
563      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
564    return block;
565    }
566    
567    static void stack_free(void *block)
568    {
569    if (show_malloc)
570      fprintf(outfile, "stack_free       %p\n", block);
571    free(block);
572    }
573    
574    
575    /*************************************************
576    *          Call pcre_fullinfo()                  *
577    *************************************************/
578    
579    /* Get one piece of information from the pcre_fullinfo() function */
580    
581    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
582    {
583    int rc;
584    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
585      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
586    }
587    
588    
589    
590    /*************************************************
591    *         Byte flipping function                 *
592    *************************************************/
593    
594    static unsigned long int
595    byteflip(unsigned long int value, int n)
596    {
597    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
598    return ((value & 0x000000ff) << 24) |
599           ((value & 0x0000ff00) <<  8) |
600           ((value & 0x00ff0000) >>  8) |
601           ((value & 0xff000000) >> 24);
602    }
603    
604    
605    
606    
607    /*************************************************
608    *        Check match or recursion limit          *
609    *************************************************/
610    
611    static int
612    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
613      int start_offset, int options, int *use_offsets, int use_size_offsets,
614      int flag, unsigned long int *limit, int errnumber, const char *msg)
615    {
616    int count;
617    int min = 0;
618    int mid = 64;
619    int max = -1;
620    
621    extra->flags |= flag;
622    
623    for (;;)
624      {
625      *limit = mid;
626    
627      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
628        use_offsets, use_size_offsets);
629    
630      if (count == errnumber)
631        {
632        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
633        min = mid;
634        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
635        }
636    
637      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
638                             count == PCRE_ERROR_PARTIAL)
639        {
640        if (mid == min + 1)
641          {
642          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
643          break;
644          }
645        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
646        max = mid;
647        mid = (min + mid)/2;
648        }
649      else break;    /* Some other error */
650      }
651    
652    extra->flags &= ~flag;
653    return count;
654    }
655    
656    
657    
658    /*************************************************
659    *         Check newline indicator                *
660    *************************************************/
661    
662    /* This is used both at compile and run-time to check for <xxx> escapes, where
663    xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
664    
665    Arguments:
666      p           points after the leading '<'
667      f           file for error message
668    
669    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
670    */
671    
672    static int
673    check_newline(uschar *p, FILE *f)
674    {
675    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
676    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
677    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
678    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
679    fprintf(f, "Unknown newline type at: <%s\n", p);
680    return 0;
681    }
682    
683    
684    
685    /*************************************************
686    *             Usage function                     *
687    *************************************************/
688    
689    static void
690    usage(void)
691    {
692    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
693    printf("  -b       show compiled code (bytecode)\n");
694    printf("  -C       show PCRE compile-time options and exit\n");
695    printf("  -d       debug: show compiled code and information (-b and -i)\n");
696    #if !defined NODFA
697    printf("  -dfa     force DFA matching for all subjects\n");
698    #endif
699    printf("  -help    show usage information\n");
700    printf("  -i       show information about compiled patterns\n"
701           "  -m       output memory used information\n"
702           "  -o <n>   set size of offsets vector to <n>\n");
703    #if !defined NOPOSIX
704    printf("  -p       use POSIX interface\n");
705    #endif
706    printf("  -q       quiet: do not output PCRE version number at start\n");
707    printf("  -S <n>   set stack size to <n> megabytes\n");
708    printf("  -s       output store (memory) used information\n"
709           "  -t       time compilation and execution\n");
710    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
711    printf("  -tm      time execution (matching) only\n");
712    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
713    }
714    
715    
716    
717    /*************************************************
718    *                Main Program                    *
719    *************************************************/
720    
721  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
722  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 293  int options = 0; Line 729  int options = 0;
729  int study_options = 0;  int study_options = 0;
730  int op = 1;  int op = 1;
731  int timeit = 0;  int timeit = 0;
732    int timeitm = 0;
733  int showinfo = 0;  int showinfo = 0;
734  int showstore = 0;  int showstore = 0;
735    int quiet = 0;
736    int size_offsets = 45;
737    int size_offsets_max;
738    int *offsets = NULL;
739    #if !defined NOPOSIX
740  int posix = 0;  int posix = 0;
741    #endif
742  int debug = 0;  int debug = 0;
743  int done = 0;  int done = 0;
744  unsigned char buffer[30000];  int all_use_dfa = 0;
745  unsigned char dbuffer[1024];  int yield = 0;
746    int stack_size;
747    
748    /* These vectors store, end-to-end, a list of captured substring names. Assume
749    that 1024 is plenty long enough for the few names we'll be testing. */
750    
751    uschar copynames[1024];
752    uschar getnames[1024];
753    
754    uschar *copynamesptr;
755    uschar *getnamesptr;
756    
757    /* Get buffers from malloc() so that Electric Fence will check their misuse
758    when I am debugging. They grow automatically when very long lines are read. */
759    
760  /* Static so that new_malloc can use it. */  buffer = (unsigned char *)malloc(buffer_size);
761    dbuffer = (unsigned char *)malloc(buffer_size);
762    pbuffer = (unsigned char *)malloc(buffer_size);
763    
764    /* The outfile variable is static so that new_malloc can use it. */
765    
766  outfile = stdout;  outfile = stdout;
767    
768    /* The following  _setmode() stuff is some Windows magic that tells its runtime
769    library to translate CRLF into a single LF character. At least, that's what
770    I've been told: never having used Windows I take this all on trust. Originally
771    it set 0x8000, but then I was advised that _O_BINARY was better. */
772    
773    #if defined(_WIN32) || defined(WIN32)
774    _setmode( _fileno( stdout ), _O_BINARY );
775    #endif
776    
777  /* Scan options */  /* Scan options */
778    
779  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
780    {    {
781      unsigned char *endptr;
782    
783    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
784      showstore = 1;      showstore = 1;
785    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
786      else if (strcmp(argv[op], "-b") == 0) debug = 1;
787    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
788    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
789    #if !defined NODFA
790      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
791    #endif
792      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
793          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
794            *endptr == 0))
795        {
796        op++;
797        argc--;
798        }
799      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
800        {
801        int both = argv[op][2] == 0;
802        int temp;
803        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
804                         *endptr == 0))
805          {
806          timeitm = temp;
807          op++;
808          argc--;
809          }
810        else timeitm = LOOPREPEAT;
811        if (both) timeit = timeitm;
812        }
813      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
814          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
815            *endptr == 0))
816        {
817    #if defined(_WIN32) || defined(WIN32)
818        printf("PCRE: -S not supported on this OS\n");
819        exit(1);
820    #else
821        int rc;
822        struct rlimit rlim;
823        getrlimit(RLIMIT_STACK, &rlim);
824        rlim.rlim_cur = stack_size * 1024 * 1024;
825        rc = setrlimit(RLIMIT_STACK, &rlim);
826        if (rc != 0)
827          {
828        printf("PCRE: setrlimit() failed with error %d\n", rc);
829        exit(1);
830          }
831        op++;
832        argc--;
833    #endif
834        }
835    #if !defined NOPOSIX
836    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
837    #endif
838      else if (strcmp(argv[op], "-C") == 0)
839        {
840        int rc;
841        printf("PCRE version %s\n", pcre_version());
842        printf("Compiled with\n");
843        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
844        printf("  %sUTF-8 support\n", rc? "" : "No ");
845        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
846        printf("  %sUnicode properties support\n", rc? "" : "No ");
847        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
848        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
849          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
850          (rc == -1)? "ANY" : "???");
851        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
852        printf("  Internal link size = %d\n", rc);
853        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
854        printf("  POSIX malloc threshold = %d\n", rc);
855        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
856        printf("  Default match limit = %d\n", rc);
857        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
858        printf("  Default recursion depth limit = %d\n", rc);
859        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
860        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
861        goto EXIT;
862        }
863      else if (strcmp(argv[op], "-help") == 0 ||
864               strcmp(argv[op], "--help") == 0)
865        {
866        usage();
867        goto EXIT;
868        }
869    else    else
870      {      {
871      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
872      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
873      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
874             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
875      }      }
876    op++;    op++;
877    argc--;    argc--;
878    }    }
879    
880    /* Get the store for the offsets vector, and remember what it was */
881    
882    size_offsets_max = size_offsets;
883    offsets = (int *)malloc(size_offsets_max * sizeof(int));
884    if (offsets == NULL)
885      {
886      printf("** Failed to get %d bytes of memory for offsets vector\n",
887        size_offsets_max * sizeof(int));
888      yield = 1;
889      goto EXIT;
890      }
891    
892  /* Sort out the input and output files */  /* Sort out the input and output files */
893    
894  if (argc > 1)  if (argc > 1)
895    {    {
896    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
897    if (infile == NULL)    if (infile == NULL)
898      {      {
899      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
900      return 1;      yield = 1;
901        goto EXIT;
902      }      }
903    }    }
904    
905  if (argc > 2)  if (argc > 2)
906    {    {
907    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
908    if (outfile == NULL)    if (outfile == NULL)
909      {      {
910      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
911      return 1;      yield = 1;
912        goto EXIT;
913      }      }
914    }    }
915    
916  /* Set alternative malloc function */  /* Set alternative malloc function */
917    
918  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
919    pcre_free = new_free;
920    pcre_stack_malloc = stack_malloc;
921    pcre_stack_free = stack_free;
922    
923  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
924    
925  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
926    
927  /* Main loop */  /* Main loop */
928    
# Line 366  while (!done) Line 930  while (!done)
930    {    {
931    pcre *re = NULL;    pcre *re = NULL;
932    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
933    
934    #if !defined NOPOSIX  /* There are still compilers that require no indent */
935    regex_t preg;    regex_t preg;
936      int do_posix = 0;
937    #endif
938    
939    const char *error;    const char *error;
940    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
941    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
942      const unsigned char *tables = NULL;
943      unsigned long int true_size, true_study_size = 0;
944      size_t size, regex_gotten_store;
945    int do_study = 0;    int do_study = 0;
946    int do_debug = debug;    int do_debug = debug;
947      int debug_lengths = 1;
948    int do_G = 0;    int do_G = 0;
949    int do_g = 0;    int do_g = 0;
950    int do_showinfo = showinfo;    int do_showinfo = showinfo;
951    int do_showrest = 0;    int do_showrest = 0;
952    int do_posix = 0;    int do_flip = 0;
953    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
954    
955      use_utf8 = 0;
956    
957    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
958    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
959    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
960      fflush(outfile);
961    
962    p = buffer;    p = buffer;
963    while (isspace(*p)) p++;    while (isspace(*p)) p++;
964    if (*p == 0) continue;    if (*p == 0) continue;
965    
966    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
967    complete, read more. */  
968      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
969        {
970        unsigned long int magic, get_options;
971        uschar sbuf[8];
972        FILE *f;
973    
974        p++;
975        pp = p + (int)strlen((char *)p);
976        while (isspace(pp[-1])) pp--;
977        *pp = 0;
978    
979        f = fopen((char *)p, "rb");
980        if (f == NULL)
981          {
982          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
983          continue;
984          }
985    
986        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
987    
988        true_size =
989          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
990        true_study_size =
991          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
992    
993        re = (real_pcre *)new_malloc(true_size);
994        regex_gotten_store = gotten_store;
995    
996        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
997    
998        magic = ((real_pcre *)re)->magic_number;
999        if (magic != MAGIC_NUMBER)
1000          {
1001          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1002            {
1003            do_flip = 1;
1004            }
1005          else
1006            {
1007            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1008            fclose(f);
1009            continue;
1010            }
1011          }
1012    
1013        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1014          do_flip? " (byte-inverted)" : "", p);
1015    
1016        /* Need to know if UTF-8 for printing data strings */
1017    
1018        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1019        use_utf8 = (get_options & PCRE_UTF8) != 0;
1020    
1021        /* Now see if there is any following study data */
1022    
1023        if (true_study_size != 0)
1024          {
1025          pcre_study_data *psd;
1026    
1027          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1028          extra->flags = PCRE_EXTRA_STUDY_DATA;
1029    
1030          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1031          extra->study_data = psd;
1032    
1033          if (fread(psd, 1, true_study_size, f) != true_study_size)
1034            {
1035            FAIL_READ:
1036            fprintf(outfile, "Failed to read data from %s\n", p);
1037            if (extra != NULL) new_free(extra);
1038            if (re != NULL) new_free(re);
1039            fclose(f);
1040            continue;
1041            }
1042          fprintf(outfile, "Study data loaded from %s\n", p);
1043          do_study = 1;     /* To get the data output if requested */
1044          }
1045        else fprintf(outfile, "No study data\n");
1046    
1047        fclose(f);
1048        goto SHOW_INFO;
1049        }
1050    
1051      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1052      the pattern; if is isn't complete, read more. */
1053    
1054    delimiter = *p++;    delimiter = *p++;
1055    
# Line 399  while (!done) Line 1060  while (!done)
1060      }      }
1061    
1062    pp = p;    pp = p;
1063      poffset = p - buffer;
1064    
1065    for(;;)    for(;;)
1066      {      {
# Line 409  while (!done) Line 1071  while (!done)
1071        pp++;        pp++;
1072        }        }
1073      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1074      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1075      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1076        {        {
1077        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1078        done = 1;        done = 1;
# Line 427  while (!done) Line 1081  while (!done)
1081      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1082      }      }
1083    
1084      /* The buffer may have moved while being extended; reset the start of data
1085      pointer to the correct relative point in the buffer. */
1086    
1087      p = buffer + poffset;
1088    
1089    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1090    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1091    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1092    
1093    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1094    
1095    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1096      for callouts. */
1097    
1098    *pp++ = 0;    *pp++ = 0;
1099      strcpy((char *)pbuffer, (char *)p);
1100    
1101    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1102    
# Line 447  while (!done) Line 1108  while (!done)
1108      {      {
1109      switch (*pp++)      switch (*pp++)
1110        {        {
1111          case 'f': options |= PCRE_FIRSTLINE; break;
1112        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1113        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1114        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 455  while (!done) Line 1117  while (!done)
1117    
1118        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1119        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1120          case 'B': do_debug = 1; break;
1121          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1122        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1123        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1124          case 'F': do_flip = 1; break;
1125        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1126        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1127          case 'J': options |= PCRE_DUPNAMES; break;
1128        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1129          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1130    
1131    #if !defined NOPOSIX
1132        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1133    #endif
1134    
1135        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1136        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1137        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1138          case 'Z': debug_lengths = 0;
1139          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1140          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1141    
1142        case 'L':        case 'L':
1143        ppp = pp;        ppp = pp;
1144        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1145          /* The '0' test is just in case this is an unterminated line. */
1146          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1147        *ppp = 0;        *ppp = 0;
1148        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1149          {          {
1150          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1151          goto SKIP_DATA;          goto SKIP_DATA;
1152          }          }
1153          locale_set = 1;
1154        tables = pcre_maketables();        tables = pcre_maketables();
1155        pp = ppp;        pp = ppp;
1156        break;        break;
1157    
1158        case '\n': case ' ': break;        case '>':
1159          to_file = pp;
1160          while (*pp != 0) pp++;
1161          while (isspace(pp[-1])) pp--;
1162          *pp = 0;
1163          break;
1164    
1165          case '<':
1166            {
1167            int x = check_newline(pp, outfile);
1168            if (x == 0) goto SKIP_DATA;
1169            options |= x;
1170            while (*pp++ != '>');
1171            }
1172          break;
1173    
1174          case '\r':                      /* So that it works in Windows */
1175          case '\n':
1176          case ' ':
1177          break;
1178    
1179        default:        default:
1180        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1181        goto SKIP_DATA;        goto SKIP_DATA;
# Line 489  while (!done) Line 1186  while (!done)
1186    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
1187    local character tables. */    local character tables. */
1188    
1189    #if !defined NOPOSIX
1190    if (posix || do_posix)    if (posix || do_posix)
1191      {      {
1192      int rc;      int rc;
1193      int cflags = 0;      int cflags = 0;
1194    
1195      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1196      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1197        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1198        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1199        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1200    
1201      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1202    
1203      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 502  while (!done) Line 1205  while (!done)
1205    
1206      if (rc != 0)      if (rc != 0)
1207        {        {
1208        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1209        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1210        goto SKIP_DATA;        goto SKIP_DATA;
1211        }        }
# Line 511  while (!done) Line 1214  while (!done)
1214    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1215    
1216    else    else
1217    #endif  /* !defined NOPOSIX */
1218    
1219      {      {
1220      if (timeit)      if (timeit > 0)
1221        {        {
1222        register int i;        register int i;
1223        clock_t time_taken;        clock_t time_taken;
1224        clock_t start_time = clock();        clock_t start_time = clock();
1225        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1226          {          {
1227          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1228          if (re != NULL) free(re);          if (re != NULL) free(re);
1229          }          }
1230        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1231        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1232          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1233          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1234        }        }
1235    
1236      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 541  while (!done) Line 1246  while (!done)
1246          {          {
1247          for (;;)          for (;;)
1248            {            {
1249            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1250              {              {
1251              done = 1;              done = 1;
1252              goto CONTINUE;              goto CONTINUE;
# Line 552  while (!done) Line 1257  while (!done)
1257            }            }
1258          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1259          }          }
1260        goto CONTINUE;        goto CONTINUE;
1261        }        }
1262    
1263        /* Compilation succeeded; print data if required. There are now two
1264        info-returning functions. The old one has a limited interface and
1265        returns only limited data. Check that it agrees with the newer one. */
1266    
1267        if (log_store)
1268          fprintf(outfile, "Memory allocation (code space): %d\n",
1269            (int)(gotten_store -
1270                  sizeof(real_pcre) -
1271                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1272    
1273        /* Extract the size for possible writing before possibly flipping it,
1274        and remember the store that was got. */
1275    
1276        true_size = ((real_pcre *)re)->size;
1277        regex_gotten_store = gotten_store;
1278    
1279        /* If /S was present, study the regexp to generate additional info to
1280        help with the matching. */
1281    
1282        if (do_study)
1283          {
1284          if (timeit > 0)
1285            {
1286            register int i;
1287            clock_t time_taken;
1288            clock_t start_time = clock();
1289            for (i = 0; i < timeit; i++)
1290              extra = pcre_study(re, study_options, &error);
1291            time_taken = clock() - start_time;
1292            if (extra != NULL) free(extra);
1293            fprintf(outfile, "  Study time %.4f milliseconds\n",
1294              (((double)time_taken * 1000.0) / (double)timeit) /
1295                (double)CLOCKS_PER_SEC);
1296            }
1297          extra = pcre_study(re, study_options, &error);
1298          if (error != NULL)
1299            fprintf(outfile, "Failed to study: %s\n", error);
1300          else if (extra != NULL)
1301            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1302          }
1303    
1304        /* If the 'F' option was present, we flip the bytes of all the integer
1305        fields in the regex data block and the study block. This is to make it
1306        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1307        compiled on a different architecture. */
1308    
1309        if (do_flip)
1310          {
1311          real_pcre *rre = (real_pcre *)re;
1312          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1313          rre->size = byteflip(rre->size, sizeof(rre->size));
1314          rre->options = byteflip(rre->options, sizeof(rre->options));
1315          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1316          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1317          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1318          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1319          rre->name_table_offset = byteflip(rre->name_table_offset,
1320            sizeof(rre->name_table_offset));
1321          rre->name_entry_size = byteflip(rre->name_entry_size,
1322            sizeof(rre->name_entry_size));
1323          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1324    
1325          if (extra != NULL)
1326            {
1327            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1328            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1329            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1330            }
1331          }
1332    
1333        /* Extract information from the compiled data if required */
1334    
1335        SHOW_INFO:
1336    
1337        if (do_debug)
1338          {
1339          fprintf(outfile, "------------------------------------------------------------------\n");
1340          pcre_printint(re, outfile, debug_lengths);
1341          }
1342    
1343        if (do_showinfo)
1344          {
1345          unsigned long int get_options, all_options;
1346    #if !defined NOINFOCHECK
1347          int old_first_char, old_options, old_count;
1348    #endif
1349          int count, backrefmax, first_char, need_char;
1350          int nameentrysize, namecount;
1351          const uschar *nametable;
1352    
1353          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1354          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1355          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1356          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1357          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1358          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1359          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1360          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1361          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1362    
1363    #if !defined NOINFOCHECK
1364          old_count = pcre_info(re, &old_options, &old_first_char);
1365          if (count < 0) fprintf(outfile,
1366            "Error %d from pcre_info()\n", count);
1367          else
1368            {
1369            if (old_count != count) fprintf(outfile,
1370              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1371                old_count);
1372    
1373            if (old_first_char != first_char) fprintf(outfile,
1374              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1375                first_char, old_first_char);
1376    
1377            if (old_options != (int)get_options) fprintf(outfile,
1378              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1379                get_options, old_options);
1380            }
1381    #endif
1382    
1383          if (size != regex_gotten_store) fprintf(outfile,
1384            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1385            (int)size, (int)regex_gotten_store);
1386    
1387          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1388          if (backrefmax > 0)
1389            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1390    
1391          if (namecount > 0)
1392            {
1393            fprintf(outfile, "Named capturing subpatterns:\n");
1394            while (namecount-- > 0)
1395              {
1396              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1397                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1398                GET2(nametable, 0));
1399              nametable += nameentrysize;
1400              }
1401            }
1402    
1403      /* Compilation succeeded; print data if required */        /* The NOPARTIAL bit is a private bit in the options, so we have
1404          to fish it out via out back door */
1405    
1406      if (do_showinfo)        all_options = ((real_pcre *)re)->options;
1407        {        if (do_flip)
1408        int first_char, count;          {
1409            all_options = byteflip(all_options, sizeof(all_options));
1410             }
1411    
1412        if (do_debug) print_internals(re, outfile);        if ((all_options & PCRE_NOPARTIAL) != 0)
1413            fprintf(outfile, "Partial matching not supported\n");
1414    
1415        count = pcre_info(re, &options, &first_char);        if (get_options == 0) fprintf(outfile, "No options\n");
1416        if (count < 0) fprintf(outfile,          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1417          "Error %d while reading info\n", count);            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1418              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1419              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1420              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1421              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1422              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1423              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1424              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1425              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1426              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1427              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1428              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1429              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1430    
1431          switch (get_options & PCRE_NEWLINE_BITS)
1432            {
1433            case PCRE_NEWLINE_CR:
1434            fprintf(outfile, "Forced newline sequence: CR\n");
1435            break;
1436    
1437            case PCRE_NEWLINE_LF:
1438            fprintf(outfile, "Forced newline sequence: LF\n");
1439            break;
1440    
1441            case PCRE_NEWLINE_CRLF:
1442            fprintf(outfile, "Forced newline sequence: CRLF\n");
1443            break;
1444    
1445            case PCRE_NEWLINE_ANY:
1446            fprintf(outfile, "Forced newline sequence: ANY\n");
1447            break;
1448    
1449            default:
1450            break;
1451            }
1452    
1453          if (first_char == -1)
1454            {
1455            fprintf(outfile, "First char at start or follows newline\n");
1456            }
1457          else if (first_char < 0)
1458            {
1459            fprintf(outfile, "No first char\n");
1460            }
1461        else        else
1462          {          {
1463          fprintf(outfile, "Identifying subpattern count = %d\n", count);          int ch = first_char & 255;
1464          if (options == 0) fprintf(outfile, "No options\n");          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1465            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",            "" : " (caseless)";
1466              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",          if (PRINTHEX(ch))
1467              ((options & PCRE_CASELESS) != 0)? " caseless" : "",            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1468              ((options & PCRE_EXTENDED) != 0)? " extended" : "",          else
1469              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1470              ((options & PCRE_DOTALL) != 0)? " dotall" : "",          }
1471              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
1472              ((options & PCRE_EXTRA) != 0)? " extra" : "",        if (need_char < 0)
1473              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");          {
1474          if (first_char == -1)          fprintf(outfile, "No need char\n");
1475            {          }
1476            fprintf(outfile, "First char at start or follows \\n\n");        else
1477            }          {
1478          else if (first_char < 0)          int ch = need_char & 255;
1479            {          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1480            fprintf(outfile, "No first char\n");            "" : " (caseless)";
1481            }          if (PRINTHEX(ch))
1482              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1483            else
1484              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1485            }
1486    
1487          /* Don't output study size; at present it is in any case a fixed
1488          value, but it varies, depending on the computer architecture, and
1489          so messes up the test suite. (And with the /F option, it might be
1490          flipped.) */
1491    
1492          if (do_study)
1493            {
1494            if (extra == NULL)
1495              fprintf(outfile, "Study returned NULL\n");
1496          else          else
1497            {            {
1498            if (isprint(first_char))            uschar *start_bits = NULL;
1499              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1500    
1501              if (start_bits == NULL)
1502                fprintf(outfile, "No starting byte set\n");
1503            else            else
1504              fprintf(outfile, "First char = %d\n", first_char);              {
1505                int i;
1506                int c = 24;
1507                fprintf(outfile, "Starting byte set: ");
1508                for (i = 0; i < 256; i++)
1509                  {
1510                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1511                    {
1512                    if (c > 75)
1513                      {
1514                      fprintf(outfile, "\n  ");
1515                      c = 2;
1516                      }
1517                    if (PRINTHEX(i) && i != ' ')
1518                      {
1519                      fprintf(outfile, "%c ", i);
1520                      c += 2;
1521                      }
1522                    else
1523                      {
1524                      fprintf(outfile, "\\x%02x ", i);
1525                      c += 5;
1526                      }
1527                    }
1528                  }
1529                fprintf(outfile, "\n");
1530                }
1531            }            }
1532          }          }
1533        }        }
1534    
1535      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1536      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1537        the study length, in big-endian order. */
1538    
1539      if (do_study)      if (to_file != NULL)
1540        {        {
1541        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1542          if (f == NULL)
1543          {          {
1544          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
1545          }          }
1546          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (do_showinfo)  
1547          {          {
1548          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1549          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (true_size >> 24)  & 255;
1550            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1551            sbuf[2] = (true_size >>  8)  & 255;
1552            sbuf[3] = (true_size)  & 255;
1553    
1554            sbuf[4] = (true_study_size >> 24)  & 255;
1555            sbuf[5] = (true_study_size >> 16)  & 255;
1556            sbuf[6] = (true_study_size >>  8)  & 255;
1557            sbuf[7] = (true_study_size)  & 255;
1558    
1559            if (fwrite(sbuf, 1, 8, f) < 8 ||
1560                fwrite(re, 1, true_size, f) < true_size)
1561              {
1562              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1563              }
1564          else          else
1565            {            {
1566            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1567            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1568              {              {
1569              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1570                    true_study_size)
1571                {                {
1572                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1573                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1574                }                }
1575                else fprintf(outfile, "Study data written to %s\n", to_file);
1576    
1577              }              }
           fprintf(outfile, "\n");  
1578            }            }
1579            fclose(f);
1580          }          }
1581    
1582          new_free(re);
1583          if (extra != NULL) new_free(extra);
1584          if (tables != NULL) new_free((void *)tables);
1585          continue;  /* With next regex */
1586        }        }
1587      }      }        /* End of non-POSIX compile */
1588    
1589    /* Read data lines and test them */    /* Read data lines and test them */
1590    
1591    for (;;)    for (;;)
1592      {      {
1593      unsigned char *q;      uschar *q;
1594      unsigned char *bptr = dbuffer;      uschar *bptr = dbuffer;
1595        int *use_offsets = offsets;
1596        int use_size_offsets = size_offsets;
1597        int callout_data = 0;
1598        int callout_data_set = 0;
1599      int count, c;      int count, c;
1600      int copystrings = 0;      int copystrings = 0;
1601        int find_match_limit = 0;
1602      int getstrings = 0;      int getstrings = 0;
1603      int getlist = 0;      int getlist = 0;
1604        int gmatched = 0;
1605      int start_offset = 0;      int start_offset = 0;
1606      int offsets[45];      int g_notempty = 0;
1607      int size_offsets = sizeof(offsets)/sizeof(int);      int use_dfa = 0;
1608    
1609      options = 0;      options = 0;
1610    
1611      if (infile == stdin) printf("data> ");      *copynames = 0;
1612      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1613    
1614        copynamesptr = copynames;
1615        getnamesptr = getnames;
1616    
1617        pcre_callout = callout;
1618        first_callout = 1;
1619        callout_extra = 0;
1620        callout_count = 0;
1621        callout_fail_count = 999999;
1622        callout_fail_id = -1;
1623        show_malloc = 0;
1624    
1625        if (extra != NULL) extra->flags &=
1626          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1627    
1628        len = 0;
1629        for (;;)
1630        {        {
1631        done = 1;        if (infile == stdin) printf("data> ");
1632        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1633            {
1634            if (len > 0) break;
1635            done = 1;
1636            goto CONTINUE;
1637            }
1638          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1639          len = (int)strlen((char *)buffer);
1640          if (buffer[len-1] == '\n') break;
1641        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1642    
     len = (int)strlen((char *)buffer);  
1643      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1644      buffer[len] = 0;      buffer[len] = 0;
1645      if (len == 0) break;      if (len == 0) break;
# Line 699  while (!done) Line 1652  while (!done)
1652        {        {
1653        int i = 0;        int i = 0;
1654        int n = 0;        int n = 0;
1655    
1656        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1657          {          {
1658          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 715  while (!done) Line 1669  while (!done)
1669          c -= '0';          c -= '0';
1670          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1671            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1672    
1673    #if !defined NOUTF8
1674            if (use_utf8 && c > 255)
1675              {
1676              unsigned char buff8[8];
1677              int ii, utn;
1678              utn = ord2utf8(c, buff8);
1679              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1680              c = buff8[ii];   /* Last byte */
1681              }
1682    #endif
1683          break;          break;
1684    
1685          case 'x':          case 'x':
1686    
1687            /* Handle \x{..} specially - new Perl thing for utf8 */
1688    
1689    #if !defined NOUTF8
1690            if (*p == '{')
1691              {
1692              unsigned char *pt = p;
1693              c = 0;
1694              while (isxdigit(*(++pt)))
1695                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1696              if (*pt == '}')
1697                {
1698                unsigned char buff8[8];
1699                int ii, utn;
1700                utn = ord2utf8(c, buff8);
1701                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1702                c = buff8[ii];   /* Last byte */
1703                p = pt + 1;
1704                break;
1705                }
1706              /* Not correct form; fall through */
1707              }
1708    #endif
1709    
1710            /* Ordinary \x */
1711    
1712          c = 0;          c = 0;
1713          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1714            {            {
# Line 726  while (!done) Line 1717  while (!done)
1717            }            }
1718          break;          break;
1719    
1720          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1721          p--;          p--;
1722          continue;          continue;
1723    
1724            case '>':
1725            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1726            continue;
1727    
1728          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1729          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1730          continue;          continue;
# Line 739  while (!done) Line 1734  while (!done)
1734          continue;          continue;
1735    
1736          case 'C':          case 'C':
1737          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1738          copystrings |= 1 << n;            {
1739              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1740              copystrings |= 1 << n;
1741              }
1742            else if (isalnum(*p))
1743              {
1744              uschar *npp = copynamesptr;
1745              while (isalnum(*p)) *npp++ = *p++;
1746              *npp++ = 0;
1747              *npp = 0;
1748              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1749              if (n < 0)
1750                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1751              copynamesptr = npp;
1752              }
1753            else if (*p == '+')
1754              {
1755              callout_extra = 1;
1756              p++;
1757              }
1758            else if (*p == '-')
1759              {
1760              pcre_callout = NULL;
1761              p++;
1762              }
1763            else if (*p == '!')
1764              {
1765              callout_fail_id = 0;
1766              p++;
1767              while(isdigit(*p))
1768                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1769              callout_fail_count = 0;
1770              if (*p == '!')
1771                {
1772                p++;
1773                while(isdigit(*p))
1774                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1775                }
1776              }
1777            else if (*p == '*')
1778              {
1779              int sign = 1;
1780              callout_data = 0;
1781              if (*(++p) == '-') { sign = -1; p++; }
1782              while(isdigit(*p))
1783                callout_data = callout_data * 10 + *p++ - '0';
1784              callout_data *= sign;
1785              callout_data_set = 1;
1786              }
1787            continue;
1788    
1789    #if !defined NODFA
1790            case 'D':
1791    #if !defined NOPOSIX
1792            if (posix || do_posix)
1793              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1794            else
1795    #endif
1796              use_dfa = 1;
1797            continue;
1798    
1799            case 'F':
1800            options |= PCRE_DFA_SHORTEST;
1801          continue;          continue;
1802    #endif
1803    
1804          case 'G':          case 'G':
1805          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1806          getstrings |= 1 << n;            {
1807              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1808              getstrings |= 1 << n;
1809              }
1810            else if (isalnum(*p))
1811              {
1812              uschar *npp = getnamesptr;
1813              while (isalnum(*p)) *npp++ = *p++;
1814              *npp++ = 0;
1815              *npp = 0;
1816              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1817              if (n < 0)
1818                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1819              getnamesptr = npp;
1820              }
1821          continue;          continue;
1822    
1823          case 'L':          case 'L':
1824          getlist = 1;          getlist = 1;
1825          continue;          continue;
1826    
1827            case 'M':
1828            find_match_limit = 1;
1829            continue;
1830    
1831            case 'N':
1832            options |= PCRE_NOTEMPTY;
1833            continue;
1834    
1835          case 'O':          case 'O':
1836          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1837          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1838              {
1839              size_offsets_max = n;
1840              free(offsets);
1841              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1842              if (offsets == NULL)
1843                {
1844                printf("** Failed to get %d bytes of memory for offsets vector\n",
1845                  size_offsets_max * sizeof(int));
1846                yield = 1;
1847                goto EXIT;
1848                }
1849              }
1850            use_size_offsets = n;
1851            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1852            continue;
1853    
1854            case 'P':
1855            options |= PCRE_PARTIAL;
1856            continue;
1857    
1858            case 'Q':
1859            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1860            if (extra == NULL)
1861              {
1862              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1863              extra->flags = 0;
1864              }
1865            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1866            extra->match_limit_recursion = n;
1867            continue;
1868    
1869            case 'q':
1870            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871            if (extra == NULL)
1872              {
1873              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874              extra->flags = 0;
1875              }
1876            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1877            extra->match_limit = n;
1878            continue;
1879    
1880    #if !defined NODFA
1881            case 'R':
1882            options |= PCRE_DFA_RESTART;
1883            continue;
1884    #endif
1885    
1886            case 'S':
1887            show_malloc = 1;
1888          continue;          continue;
1889    
1890          case 'Z':          case 'Z':
1891          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1892          continue;          continue;
1893    
1894            case '?':
1895            options |= PCRE_NO_UTF8_CHECK;
1896            continue;
1897    
1898            case '<':
1899              {
1900              int x = check_newline(p, outfile);
1901              if (x == 0) goto NEXT_DATA;
1902              options |= x;
1903              while (*p++ != '>');
1904              }
1905            continue;
1906          }          }
1907        *q++ = c;        *q++ = c;
1908        }        }
1909      *q = 0;      *q = 0;
1910      len = q - dbuffer;      len = q - dbuffer;
1911    
1912        if ((all_use_dfa || use_dfa) && find_match_limit)
1913          {
1914          printf("**Match limit not relevant for DFA matching: ignored\n");
1915          find_match_limit = 0;
1916          }
1917    
1918      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1919      support timing. */      support timing or playing with the match limit or callout data. */
1920    
1921    #if !defined NOPOSIX
1922      if (posix || do_posix)      if (posix || do_posix)
1923        {        {
1924        int rc;        int rc;
1925        int eflags = 0;        int eflags = 0;
1926        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1927          if (use_size_offsets > 0)
1928            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1929        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1930        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1931    
1932        rc = regexec(&preg, (unsigned char *)bptr,        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         sizeof(pmatch)/sizeof(regmatch_t), pmatch, eflags);  
1933    
1934        if (rc != 0)        if (rc != 0)
1935          {          {
1936          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1937          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1938          }          }
1939          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1940                  != 0)
1941            {
1942            fprintf(outfile, "Matched with REG_NOSUB\n");
1943            }
1944        else        else
1945          {          {
1946          size_t i;          size_t i;
1947          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1948            {            {
1949            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1950              {              {
1951              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1952              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1953                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1954              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1955              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1956                {                {
1957                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1958                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1959                    outfile);
1960                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1961                }                }
1962              }              }
1963            }            }
1964          }          }
1965          free(pmatch);
1966        }        }
1967    
1968      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
1969    
1970      else for (;;)      else
1971    #endif  /* !defined NOPOSIX */
1972    
1973        for (;; gmatched++)    /* Loop for /g or /G */
1974        {        {
1975        if (timeit)        if (timeitm > 0)
1976          {          {
1977          register int i;          register int i;
1978          clock_t time_taken;          clock_t time_taken;
1979          clock_t start_time = clock();          clock_t start_time = clock();
1980          for (i = 0; i < LOOPREPEAT; i++)  
1981    #if !defined NODFA
1982            if (all_use_dfa || use_dfa)
1983              {
1984              int workspace[1000];
1985              for (i = 0; i < timeitm; i++)
1986                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1987                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1988                  sizeof(workspace)/sizeof(int));
1989              }
1990            else
1991    #endif
1992    
1993            for (i = 0; i < timeitm; i++)
1994            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1995              (do_g? start_offset : 0), options, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1996    
1997          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1998          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
1999            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2000            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2001            }
2002    
2003          /* If find_match_limit is set, we want to do repeated matches with
2004          varying limits in order to find the minimum value for the match limit and
2005          for the recursion limit. */
2006    
2007          if (find_match_limit)
2008            {
2009            if (extra == NULL)
2010              {
2011              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2012              extra->flags = 0;
2013              }
2014    
2015            (void)check_match_limit(re, extra, bptr, len, start_offset,
2016              options|g_notempty, use_offsets, use_size_offsets,
2017              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2018              PCRE_ERROR_MATCHLIMIT, "match()");
2019    
2020            count = check_match_limit(re, extra, bptr, len, start_offset,
2021              options|g_notempty, use_offsets, use_size_offsets,
2022              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2023              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2024            }
2025    
2026          /* If callout_data is set, use the interface with additional data */
2027    
2028          else if (callout_data_set)
2029            {
2030            if (extra == NULL)
2031              {
2032              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2033              extra->flags = 0;
2034              }
2035            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2036            extra->callout_data = &callout_data;
2037            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2038              options | g_notempty, use_offsets, use_size_offsets);
2039            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2040          }          }
2041    
2042        count = pcre_exec(re, extra, (char *)bptr, len,        /* The normal case is just to do the match once, with the default
2043          (do_g? start_offset : 0), options, offsets, size_offsets);        value of match_limit. */
2044    
2045    #if !defined NODFA
2046          else if (all_use_dfa || use_dfa)
2047            {
2048            int workspace[1000];
2049            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2050              options | g_notempty, use_offsets, use_size_offsets, workspace,
2051              sizeof(workspace)/sizeof(int));
2052            if (count == 0)
2053              {
2054              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2055              count = use_size_offsets/2;
2056              }
2057            }
2058    #endif
2059    
2060        if (count == 0)        else
2061          {          {
2062          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2063          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2064            if (count == 0)
2065              {
2066              fprintf(outfile, "Matched, but too many substrings\n");
2067              count = use_size_offsets/3;
2068              }
2069          }          }
2070    
2071          /* Matched */
2072    
2073        if (count >= 0)        if (count >= 0)
2074          {          {
2075          int i;          int i, maxcount;
2076    
2077    #if !defined NODFA
2078            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2079    #endif
2080              maxcount = use_size_offsets/3;
2081    
2082            /* This is a check against a lunatic return value. */
2083    
2084            if (count > maxcount)
2085              {
2086              fprintf(outfile,
2087                "** PCRE error: returned count %d is too big for offset size %d\n",
2088                count, use_size_offsets);
2089              count = use_size_offsets/3;
2090              if (do_g || do_G)
2091                {
2092                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2093                do_g = do_G = FALSE;        /* Break g/G loop */
2094                }
2095              }
2096    
2097          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2098            {            {
2099            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2100              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2101            else            else
2102              {              {
2103              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2104              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2105                  use_offsets[i+1] - use_offsets[i], outfile);
2106              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2107              if (i == 0)              if (i == 0)
2108                {                {
               start_offset = offsets[1];  
2109                if (do_showrest)                if (do_showrest)
2110                  {                  {
2111                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
2112                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2113                      outfile);
2114                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
2115                  }                  }
2116                }                }
# Line 863  while (!done) Line 2121  while (!done)
2121            {            {
2122            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2123              {              {
2124              char buffer[16];              char copybuffer[256];
2125              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2126                i, buffer, sizeof(buffer));                i, copybuffer, sizeof(copybuffer));
2127              if (rc < 0)              if (rc < 0)
2128                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2129              else              else
2130                fprintf(outfile, "%2dC %s (%d)\n", i, buffer, rc);                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2131              }              }
2132            }            }
2133    
2134            for (copynamesptr = copynames;
2135                 *copynamesptr != 0;
2136                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2137              {
2138              char copybuffer[256];
2139              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2140                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2141              if (rc < 0)
2142                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2143              else
2144                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2145              }
2146    
2147          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2148            {            {
2149            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
2150              {              {
2151              const char *substring;              const char *substring;
2152              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2153                i, &substring);                i, &substring);
2154              if (rc < 0)              if (rc < 0)
2155                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
2156              else              else
2157                {                {
2158                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2159                free((void *)substring);                pcre_free_substring(substring);
2160                }                }
2161              }              }
2162            }            }
2163    
2164            for (getnamesptr = getnames;
2165                 *getnamesptr != 0;
2166                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2167              {
2168              const char *substring;
2169              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2170                count, (char *)getnamesptr, &substring);
2171              if (rc < 0)
2172                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2173              else
2174                {
2175                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2176                pcre_free_substring(substring);
2177                }
2178              }
2179    
2180          if (getlist)          if (getlist)
2181            {            {
2182            const char **stringlist;            const char **stringlist;
2183            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2184              &stringlist);              &stringlist);
2185            if (rc < 0)            if (rc < 0)
2186              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 903  while (!done) Line 2190  while (!done)
2190                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2191              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2192                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
2193              free((void *)stringlist);              /* free((void *)stringlist); */
2194                pcre_free_substring_list(stringlist);
2195              }              }
2196            }            }
2197            }
2198    
2199          /* There was a partial match */
2200    
2201          else if (count == PCRE_ERROR_PARTIAL)
2202            {
2203            fprintf(outfile, "Partial match");
2204    #if !defined NODFA
2205            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2206              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2207                bptr + use_offsets[0]);
2208    #endif
2209            fprintf(outfile, "\n");
2210            break;  /* Out of the /g loop */
2211          }          }
2212    
2213          /* Failed to match. If this is a /g or /G loop and we previously set
2214          g_notempty after a null match, this is not necessarily the end.
2215          We want to advance the start offset, and continue. In the case of UTF-8
2216          matching, the advance must be one character, not one byte. Fudge the
2217          offset values to achieve this. We won't be at the end of the string -
2218          that was checked before setting g_notempty. */
2219    
2220        else        else
2221          {          {
2222          if (start_offset == 0)          if (g_notempty != 0)
2223              {
2224              int onechar = 1;
2225              use_offsets[0] = start_offset;
2226              if (use_utf8)
2227                {
2228                while (start_offset + onechar < len)
2229                  {
2230                  int tb = bptr[start_offset+onechar];
2231                  if (tb <= 127) break;
2232                  tb &= 0xc0;
2233                  if (tb != 0 && tb != 0xc0) onechar++;
2234                  }
2235                }
2236              use_offsets[1] = start_offset + onechar;
2237              }
2238            else
2239            {            {
2240            if (count == -1) fprintf(outfile, "No match\n");            if (count == PCRE_ERROR_NOMATCH)
2241              else fprintf(outfile, "Error %d\n", count);              {
2242                if (gmatched == 0) fprintf(outfile, "No match\n");
2243                }
2244              else fprintf(outfile, "Error %d\n", count);
2245              break;  /* Out of the /g loop */
2246            }            }
         start_offset = -1;  
2247          }          }
2248    
2249        if ((!do_g && !do_G) || start_offset <= 0) break;        /* If not /g or /G we are done */
2250        if (do_G)  
2251          if (!do_g && !do_G) break;
2252    
2253          /* If we have matched an empty string, first check to see if we are at
2254          the end of the subject. If so, the /g loop is over. Otherwise, mimic
2255          what Perl's /g options does. This turns out to be rather cunning. First
2256          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2257          same point. If this fails (picked up above) we advance to the next
2258          character. */
2259    
2260          g_notempty = 0;
2261          if (use_offsets[0] == use_offsets[1])
2262          {          {
2263          bptr += start_offset;          if (use_offsets[0] == len) break;
2264          len -= start_offset;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2265          }          }
2266        }  
2267      }        /* For /g, update the start offset, leaving the rest alone */
2268    
2269          if (do_g) start_offset = use_offsets[1];
2270    
2271          /* For /G, update the pointer and length */
2272    
2273          else
2274            {
2275            bptr += use_offsets[1];
2276            len -= use_offsets[1];
2277            }
2278          }  /* End of loop for /g and /G */
2279    
2280        NEXT_DATA: continue;
2281        }    /* End of loop for data lines */
2282    
2283    CONTINUE:    CONTINUE:
2284    
2285    #if !defined NOPOSIX
2286    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2287    if (re != NULL) free(re);  #endif
2288    if (extra != NULL) free(extra);  
2289      if (re != NULL) new_free(re);
2290      if (extra != NULL) new_free(extra);
2291    if (tables != NULL)    if (tables != NULL)
2292      {      {
2293      free((void *)tables);      new_free((void *)tables);
2294      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2295        locale_set = 0;
2296      }      }
2297    }    }
2298    
2299  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2300  return 0;  
2301    EXIT:
2302    
2303    if (infile != NULL && infile != stdin) fclose(infile);
2304    if (outfile != NULL && outfile != stdout) fclose(outfile);
2305    
2306    free(buffer);
2307    free(dbuffer);
2308    free(pbuffer);
2309    free(offsets);
2310    
2311    return yield;
2312  }  }
2313    
2314  /* End */  /* End of pcretest.c */

Legend:
Removed from v.35  
changed lines
  Added in v.123

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12