/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 31 by nigel, Sat Feb 24 21:38:57 2007 UTC revision 107 by ph10, Wed Mar 7 11:02:28 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47    
48    /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
69    
70    #define PCRE_SPY        /* For Win32 build, import data, not export */
71    
72    /* We include pcre_internal.h because we need the internal info for displaying
73    the results of pcre_study() and we also need to know about the internal
74    macros, structures, and other internal data values; pcretest has "inside
75    information" compared to a program that strictly follows the PCRE API. */
76    
77    #include "pcre_internal.h"
78    
79    /* We need access to the data tables that PCRE uses. So as not to have to keep
80    two copies, we include the source file here, changing the names of the external
81    symbols to prevent clashes. */
82    
83    #define _pcre_utf8_table1      utf8_table1
84    #define _pcre_utf8_table1_size utf8_table1_size
85    #define _pcre_utf8_table2      utf8_table2
86    #define _pcre_utf8_table3      utf8_table3
87    #define _pcre_utf8_table4      utf8_table4
88    #define _pcre_utt              utt
89    #define _pcre_utt_size         utt_size
90    #define _pcre_OP_lengths       OP_lengths
91    
92  /* Use the internal info for displaying the results of pcre_study(). */  #include "pcre_tables.c"
93    
94  #include "internal.h"  /* We also need the pcre_printint() function for printing out compiled
95    patterns. This function is in a separate file so that it can be included in
96    pcre_compile.c when that module is compiled with debugging enabled.
97    
98    The definition of the macro PRINTABLE, which determines whether to print an
99    output character as-is or as a hex value when showing compiled patterns, is
100    contained in this file. We uses it here also, in cases when the locale has not
101    been explicitly changed, so as to get consistent output from systems that
102    differ in their output from isprint() even in the "C" locale. */
103    
104    #include "pcre_printint.src"
105    
106    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107    
108    
109    /* It is possible to compile this test program without including support for
110    testing the POSIX interface, though this is not available via the standard
111    Makefile. */
112    
113    #if !defined NOPOSIX
114  #include "pcreposix.h"  #include "pcreposix.h"
115    #endif
116    
117    /* It is also possible, for the benefit of the version currently imported into
118    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
119    interface to the DFA matcher (NODFA), and without the doublecheck of the old
120    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
121    UTF8 support if PCRE is built without it. */
122    
123    #ifndef SUPPORT_UTF8
124    #ifndef NOUTF8
125    #define NOUTF8
126    #endif
127    #endif
128    
129    
130    /* Other parameters */
131    
132  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
133  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 137 
137  #endif  #endif
138  #endif  #endif
139    
140  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
141    
142    #define LOOPREPEAT 500000
143    
144    /* Static variables */
145    
146  static FILE *outfile;  static FILE *outfile;
147  static int log_store = 0;  static int log_store = 0;
148    static int callout_count;
149    static int callout_extra;
150    static int callout_fail_count;
151    static int callout_fail_id;
152    static int first_callout;
153    static int locale_set = 0;
154    static int show_malloc;
155    static int use_utf8;
156    static size_t gotten_store;
157    
158    /* The buffers grow automatically if very long input lines are encountered. */
159    
160    static int buffer_size = 50000;
161    static uschar *buffer = NULL;
162    static uschar *dbuffer = NULL;
163    static uschar *pbuffer = NULL;
164    
165    
166    
167  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
168  code as contained in pcre.c under the DEBUG macro. */  *        Read or extend an input line            *
169    *************************************************/
170    
171  static const char *OP_names[] = {  /* Input lines are read into buffer, but both patterns and data lines can be
172    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  continued over multiple input lines. In addition, if the buffer fills up, we
173    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  want to automatically expand it so as to be able to handle extremely large
174    "Opt", "^", "$", "Any", "chars", "not",  lines that are needed for certain stress tests. When the input buffer is
175    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  expanded, the other two buffers must also be expanded likewise, and the
176    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  contents of pbuffer, which are a copy of the input for callouts, must be
177    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  preserved (for when expansion happens for a data line). This is not the most
178    "*", "*?", "+", "+?", "?", "??", "{", "{",  optimal way of handling this, but hey, this is just a test program!
179    "class", "Ref",  
180    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  Arguments:
181    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    f            the file to read
182    "Brazero", "Braminzero", "Bra"    start        where in buffer to start (this *must* be within buffer)
183  };  
184    Returns:       pointer to the start of new data
185                   could be a copy of start, or could be moved
186  static void print_internals(pcre *re, FILE *outfile)                 NULL if no data read and EOF reached
187  {  */
 unsigned char *code = ((real_pcre *)re)->code;  
   
 fprintf(outfile, "------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
188    
189          case OP_CRRANGE:  static uschar *
190          case OP_CRMINRANGE:  extend_inputline(FILE *f, uschar *start)
191          min = (code[1] << 8) + code[2];  {
192          max = (code[3] << 8) + code[4];  uschar *here = start;
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
193    
194          default:  for (;;)
195          code--;    {
196          }    int rlen = buffer_size - (here - buffer);
197    
198      if (rlen > 1000)
199        {
200        int dlen;
201        if (fgets((char *)here, rlen,  f) == NULL)
202          return (here == start)? NULL : start;
203        dlen = (int)strlen((char *)here);
204        if (dlen > 0 && here[dlen - 1] == '\n') return start;
205        here += dlen;
206        }
207    
208      else
209        {
210        int new_buffer_size = 2*buffer_size;
211        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
212        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
213        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
214    
215        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
216          {
217          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
218          exit(1);
219        }        }
     break;  
220    
221      /* Anything else is just a one-node item */      memcpy(new_buffer, buffer, buffer_size);
222        memcpy(new_pbuffer, pbuffer, buffer_size);
223    
224        buffer_size = new_buffer_size;
225    
226        start = new_buffer + (start - buffer);
227        here = new_buffer + (here - buffer);
228    
229      default:      free(buffer);
230      fprintf(outfile, "    %s", OP_names[*code]);      free(dbuffer);
231      break;      free(pbuffer);
232    
233        buffer = new_buffer;
234        dbuffer = new_dbuffer;
235        pbuffer = new_pbuffer;
236      }      }
237      }
238    
239    return NULL;  /* Control never gets here */
240    }
241    
242    
243    
244    code++;  
245    fprintf(outfile, "\n");  
246    
247    
248    /*************************************************
249    *          Read number from string               *
250    *************************************************/
251    
252    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
253    around with conditional compilation, just do the job by hand. It is only used
254    for unpicking arguments, so just keep it simple.
255    
256    Arguments:
257      str           string to be converted
258      endptr        where to put the end pointer
259    
260    Returns:        the unsigned long
261    */
262    
263    static int
264    get_value(unsigned char *str, unsigned char **endptr)
265    {
266    int result = 0;
267    while(*str != 0 && isspace(*str)) str++;
268    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
269    *endptr = str;
270    return(result);
271    }
272    
273    
274    
275    
276    /*************************************************
277    *            Convert UTF-8 string to value       *
278    *************************************************/
279    
280    /* This function takes one or more bytes that represents a UTF-8 character,
281    and returns the value of the character.
282    
283    Argument:
284      utf8bytes   a pointer to the byte vector
285      vptr        a pointer to an int to receive the value
286    
287    Returns:      >  0 => the number of bytes consumed
288                  -6 to 0 => malformed UTF-8 character at offset = (-return)
289    */
290    
291    #if !defined NOUTF8
292    
293    static int
294    utf82ord(unsigned char *utf8bytes, int *vptr)
295    {
296    int c = *utf8bytes++;
297    int d = c;
298    int i, j, s;
299    
300    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
301      {
302      if ((d & 0x80) == 0) break;
303      d <<= 1;
304    }    }
305    
306    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
307    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
308    
309    /* i now has a value in the range 1-5 */
310    
311    s = 6*i;
312    d = (c & utf8_table3[i]) << s;
313    
314    for (j = 0; j < i; j++)
315      {
316      c = *utf8bytes++;
317      if ((c & 0xc0) != 0x80) return -(j+1);
318      s -= 6;
319      d |= (c & 0x3f) << s;
320      }
321    
322    /* Check that encoding was the correct unique one */
323    
324    for (j = 0; j < utf8_table1_size; j++)
325      if (d <= utf8_table1[j]) break;
326    if (j != i) return -(i+1);
327    
328    /* Valid value */
329    
330    *vptr = d;
331    return i+1;
332  }  }
333    
334    #endif
335    
336    
337    
338    /*************************************************
339    *       Convert character value to UTF-8         *
340    *************************************************/
341    
342    /* This function takes an integer value in the range 0 - 0x7fffffff
343    and encodes it as a UTF-8 character in 0 to 6 bytes.
344    
345    Arguments:
346      cvalue     the character value
347      utf8bytes  pointer to buffer for result - at least 6 bytes long
348    
349    Returns:     number of characters placed in the buffer
350    */
351    
352  /* Character string printing function. */  #if !defined NOUTF8
353    
354  static void pchars(unsigned char *p, int length)  static int
355    ord2utf8(int cvalue, uschar *utf8bytes)
356  {  {
357  int c;  register int i, j;
358    for (i = 0; i < utf8_table1_size; i++)
359      if (cvalue <= utf8_table1[i]) break;
360    utf8bytes += i;
361    for (j = i; j > 0; j--)
362     {
363     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
364     cvalue >>= 6;
365     }
366    *utf8bytes = utf8_table2[i] | cvalue;
367    return i + 1;
368    }
369    
370    #endif
371    
372    
373    
374    /*************************************************
375    *             Print character string             *
376    *************************************************/
377    
378    /* Character string printing function. Must handle UTF-8 strings in utf8
379    mode. Yields number of characters printed. If handed a NULL file, just counts
380    chars without printing. */
381    
382    static int pchars(unsigned char *p, int length, FILE *f)
383    {
384    int c = 0;
385    int yield = 0;
386    
387  while (length-- > 0)  while (length-- > 0)
388    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
389      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
390      if (use_utf8)
391        {
392        int rc = utf82ord(p, &c);
393    
394        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
395          {
396          length -= rc - 1;
397          p += rc;
398          if (PRINTHEX(c))
399            {
400            if (f != NULL) fprintf(f, "%c", c);
401            yield++;
402            }
403          else
404            {
405            int n = 4;
406            if (f != NULL) fprintf(f, "\\x{%02x}", c);
407            yield += (n <= 0x000000ff)? 2 :
408                     (n <= 0x00000fff)? 3 :
409                     (n <= 0x0000ffff)? 4 :
410                     (n <= 0x000fffff)? 5 : 6;
411            }
412          continue;
413          }
414        }
415    #endif
416    
417       /* Not UTF-8, or malformed UTF-8  */
418    
419      c = *p++;
420      if (PRINTHEX(c))
421        {
422        if (f != NULL) fprintf(f, "%c", c);
423        yield++;
424        }
425      else
426        {
427        if (f != NULL) fprintf(f, "\\x%02x", c);
428        yield += 4;
429        }
430      }
431    
432    return yield;
433  }  }
434    
435    
436    
437    /*************************************************
438    *              Callout function                  *
439    *************************************************/
440    
441    /* Called from PCRE as a result of the (?C) item. We print out where we are in
442    the match. Yield zero unless more callouts than the fail count, or the callout
443    data is not zero. */
444    
445    static int callout(pcre_callout_block *cb)
446    {
447    FILE *f = (first_callout | callout_extra)? outfile : NULL;
448    int i, pre_start, post_start, subject_length;
449    
450    if (callout_extra)
451      {
452      fprintf(f, "Callout %d: last capture = %d\n",
453        cb->callout_number, cb->capture_last);
454    
455      for (i = 0; i < cb->capture_top * 2; i += 2)
456        {
457        if (cb->offset_vector[i] < 0)
458          fprintf(f, "%2d: <unset>\n", i/2);
459        else
460          {
461          fprintf(f, "%2d: ", i/2);
462          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
463            cb->offset_vector[i+1] - cb->offset_vector[i], f);
464          fprintf(f, "\n");
465          }
466        }
467      }
468    
469    /* Re-print the subject in canonical form, the first time or if giving full
470    datails. On subsequent calls in the same match, we use pchars just to find the
471    printed lengths of the substrings. */
472    
473    if (f != NULL) fprintf(f, "--->");
474    
475    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
476    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
477      cb->current_position - cb->start_match, f);
478    
479    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
480    
481    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
482      cb->subject_length - cb->current_position, f);
483    
484    if (f != NULL) fprintf(f, "\n");
485    
486    /* Always print appropriate indicators, with callout number if not already
487    shown. For automatic callouts, show the pattern offset. */
488    
489    if (cb->callout_number == 255)
490      {
491      fprintf(outfile, "%+3d ", cb->pattern_position);
492      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
493      }
494    else
495      {
496      if (callout_extra) fprintf(outfile, "    ");
497        else fprintf(outfile, "%3d ", cb->callout_number);
498      }
499    
500    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
501    fprintf(outfile, "^");
502    
503    if (post_start > 0)
504      {
505      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
506      fprintf(outfile, "^");
507      }
508    
509    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
510      fprintf(outfile, " ");
511    
512    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
513      pbuffer + cb->pattern_position);
514    
515    fprintf(outfile, "\n");
516    first_callout = 0;
517    
518    if (cb->callout_data != NULL)
519      {
520      int callout_data = *((int *)(cb->callout_data));
521      if (callout_data != 0)
522        {
523        fprintf(outfile, "Callout data = %d\n", callout_data);
524        return callout_data;
525        }
526      }
527    
528    return (cb->callout_number != callout_fail_id)? 0 :
529           (++callout_count >= callout_fail_count)? 1 : 0;
530    }
531    
532    
533    /*************************************************
534    *            Local malloc functions              *
535    *************************************************/
536    
537  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
538  compiled re. */  compiled re. */
539    
540  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
541  {  {
542  if (log_store)  void *block = malloc(size);
543    fprintf(outfile, "Memory allocation request: %d (code space %d)\n",  gotten_store = size;
544      (int)size, (int)size - offsetof(real_pcre, code[0]));  if (show_malloc)
545  return malloc(size);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
546    return block;
547    }
548    
549    static void new_free(void *block)
550    {
551    if (show_malloc)
552      fprintf(outfile, "free             %p\n", block);
553    free(block);
554    }
555    
556    
557    /* For recursion malloc/free, to test stacking calls */
558    
559    static void *stack_malloc(size_t size)
560    {
561    void *block = malloc(size);
562    if (show_malloc)
563      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
564    return block;
565    }
566    
567    static void stack_free(void *block)
568    {
569    if (show_malloc)
570      fprintf(outfile, "stack_free       %p\n", block);
571    free(block);
572    }
573    
574    
575    /*************************************************
576    *          Call pcre_fullinfo()                  *
577    *************************************************/
578    
579    /* Get one piece of information from the pcre_fullinfo() function */
580    
581    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
582    {
583    int rc;
584    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
585      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
586  }  }
587    
588    
589    
590    /*************************************************
591    *         Byte flipping function                 *
592    *************************************************/
593    
594    static unsigned long int
595    byteflip(unsigned long int value, int n)
596    {
597    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
598    return ((value & 0x000000ff) << 24) |
599           ((value & 0x0000ff00) <<  8) |
600           ((value & 0x00ff0000) >>  8) |
601           ((value & 0xff000000) >> 24);
602    }
603    
604    
605    
606    
607    /*************************************************
608    *        Check match or recursion limit          *
609    *************************************************/
610    
611    static int
612    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
613      int start_offset, int options, int *use_offsets, int use_size_offsets,
614      int flag, unsigned long int *limit, int errnumber, const char *msg)
615    {
616    int count;
617    int min = 0;
618    int mid = 64;
619    int max = -1;
620    
621    extra->flags |= flag;
622    
623    for (;;)
624      {
625      *limit = mid;
626    
627      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
628        use_offsets, use_size_offsets);
629    
630      if (count == errnumber)
631        {
632        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
633        min = mid;
634        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
635        }
636    
637      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
638                             count == PCRE_ERROR_PARTIAL)
639        {
640        if (mid == min + 1)
641          {
642          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
643          break;
644          }
645        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
646        max = mid;
647        mid = (min + mid)/2;
648        }
649      else break;    /* Some other error */
650      }
651    
652    extra->flags &= ~flag;
653    return count;
654    }
655    
656    
657    
658    /*************************************************
659    *         Check newline indicator                *
660    *************************************************/
661    
662    /* This is used both at compile and run-time to check for <xxx> escapes, where
663    xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
664    
665    Arguments:
666      p           points after the leading '<'
667      f           file for error message
668    
669    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
670    */
671    
672    static int
673    check_newline(uschar *p, FILE *f)
674    {
675    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
676    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
677    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
678    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
679    fprintf(f, "Unknown newline type at: <%s\n", p);
680    return 0;
681    }
682    
683    
684    
685    /*************************************************
686    *             Usage function                     *
687    *************************************************/
688    
689    static void
690    usage(void)
691    {
692    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
693    printf("  -b       show compiled code (bytecode)\n");
694    printf("  -C       show PCRE compile-time options and exit\n");
695    printf("  -d       debug: show compiled code and information (-b and -i)\n");
696    #if !defined NODFA
697    printf("  -dfa     force DFA matching for all subjects\n");
698    #endif
699    printf("  -help    show usage information\n");
700    printf("  -i       show information about compiled patterns\n"
701           "  -m       output memory used information\n"
702           "  -o <n>   set size of offsets vector to <n>\n");
703    #if !defined NOPOSIX
704    printf("  -p       use POSIX interface\n");
705    #endif
706    printf("  -q       quiet: do not output PCRE version number at start\n");
707    printf("  -S <n>   set stack size to <n> megabytes\n");
708    printf("  -s       output store (memory) used information\n"
709           "  -t       time compilation and execution\n");
710    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
711    printf("  -tm      time execution (matching) only\n");
712    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
713    }
714    
715    
716    
717    /*************************************************
718    *                Main Program                    *
719    *************************************************/
720    
721  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
722  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
723  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 293  int options = 0; Line 729  int options = 0;
729  int study_options = 0;  int study_options = 0;
730  int op = 1;  int op = 1;
731  int timeit = 0;  int timeit = 0;
732    int timeitm = 0;
733  int showinfo = 0;  int showinfo = 0;
734  int showstore = 0;  int showstore = 0;
735    int quiet = 0;
736    int size_offsets = 45;
737    int size_offsets_max;
738    int *offsets = NULL;
739    #if !defined NOPOSIX
740  int posix = 0;  int posix = 0;
741    #endif
742  int debug = 0;  int debug = 0;
743  int done = 0;  int done = 0;
744  unsigned char buffer[30000];  int all_use_dfa = 0;
745  unsigned char dbuffer[1024];  int yield = 0;
746    int stack_size;
747    
748    /* These vectors store, end-to-end, a list of captured substring names. Assume
749    that 1024 is plenty long enough for the few names we'll be testing. */
750    
751    uschar copynames[1024];
752    uschar getnames[1024];
753    
754  /* Static so that new_malloc can use it. */  uschar *copynamesptr;
755    uschar *getnamesptr;
756    
757    /* Get buffers from malloc() so that Electric Fence will check their misuse
758    when I am debugging. They grow automatically when very long lines are read. */
759    
760    buffer = (unsigned char *)malloc(buffer_size);
761    dbuffer = (unsigned char *)malloc(buffer_size);
762    pbuffer = (unsigned char *)malloc(buffer_size);
763    
764    /* The outfile variable is static so that new_malloc can use it. */
765    
766  outfile = stdout;  outfile = stdout;
767    
768    /* The following  _setmode() stuff is some Windows magic that tells its runtime
769    library to translate CRLF into a single LF character. At least, that's what
770    I've been told: never having used Windows I take this all on trust. Originally
771    it set 0x8000, but then I was advised that _O_BINARY was better. */
772    
773    #if defined(_WIN32) || defined(WIN32)
774    _setmode( _fileno( stdout ), _O_BINARY );
775    #endif
776    
777  /* Scan options */  /* Scan options */
778    
779  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
780    {    {
781      unsigned char *endptr;
782    
783    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
784      showstore = 1;      showstore = 1;
785    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
786      else if (strcmp(argv[op], "-b") == 0) debug = 1;
787    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
788    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
789    #if !defined NODFA
790      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
791    #endif
792      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
793          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
794            *endptr == 0))
795        {
796        op++;
797        argc--;
798        }
799      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
800        {
801        int both = argv[op][2] == 0;
802        int temp;
803        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
804                         *endptr == 0))
805          {
806          timeitm = temp;
807          op++;
808          argc--;
809          }
810        else timeitm = LOOPREPEAT;
811        if (both) timeit = timeitm;
812        }
813      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
814          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
815            *endptr == 0))
816        {
817    #if defined(_WIN32) || defined(WIN32)
818        printf("PCRE: -S not supported on this OS\n");
819        exit(1);
820    #else
821        int rc;
822        struct rlimit rlim;
823        getrlimit(RLIMIT_STACK, &rlim);
824        rlim.rlim_cur = stack_size * 1024 * 1024;
825        rc = setrlimit(RLIMIT_STACK, &rlim);
826        if (rc != 0)
827          {
828        printf("PCRE: setrlimit() failed with error %d\n", rc);
829        exit(1);
830          }
831        op++;
832        argc--;
833    #endif
834        }
835    #if !defined NOPOSIX
836    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
837    #endif
838      else if (strcmp(argv[op], "-C") == 0)
839        {
840        int rc;
841        printf("PCRE version %s\n", pcre_version());
842        printf("Compiled with\n");
843        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
844        printf("  %sUTF-8 support\n", rc? "" : "No ");
845        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
846        printf("  %sUnicode properties support\n", rc? "" : "No ");
847        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
848        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
849          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
850          (rc == -1)? "ANY" : "???");
851        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
852        printf("  Internal link size = %d\n", rc);
853        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
854        printf("  POSIX malloc threshold = %d\n", rc);
855        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
856        printf("  Default match limit = %d\n", rc);
857        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
858        printf("  Default recursion depth limit = %d\n", rc);
859        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
860        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
861        exit(0);
862        }
863      else if (strcmp(argv[op], "-help") == 0 ||
864               strcmp(argv[op], "--help") == 0)
865        {
866        usage();
867        goto EXIT;
868        }
869    else    else
870      {      {
871      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
872      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
873      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
874             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
875      }      }
876    op++;    op++;
877    argc--;    argc--;
878    }    }
879    
880    /* Get the store for the offsets vector, and remember what it was */
881    
882    size_offsets_max = size_offsets;
883    offsets = (int *)malloc(size_offsets_max * sizeof(int));
884    if (offsets == NULL)
885      {
886      printf("** Failed to get %d bytes of memory for offsets vector\n",
887        size_offsets_max * sizeof(int));
888      yield = 1;
889      goto EXIT;
890      }
891    
892  /* Sort out the input and output files */  /* Sort out the input and output files */
893    
894  if (argc > 1)  if (argc > 1)
895    {    {
896    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
897    if (infile == NULL)    if (infile == NULL)
898      {      {
899      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
900      return 1;      yield = 1;
901        goto EXIT;
902      }      }
903    }    }
904    
905  if (argc > 2)  if (argc > 2)
906    {    {
907    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
908    if (outfile == NULL)    if (outfile == NULL)
909      {      {
910      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
911      return 1;      yield = 1;
912        goto EXIT;
913      }      }
914    }    }
915    
916  /* Set alternative malloc function */  /* Set alternative malloc function */
917    
918  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
919    pcre_free = new_free;
920    pcre_stack_malloc = stack_malloc;
921    pcre_stack_free = stack_free;
922    
923  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
924    
925  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
926    
927  /* Main loop */  /* Main loop */
928    
# Line 366  while (!done) Line 930  while (!done)
930    {    {
931    pcre *re = NULL;    pcre *re = NULL;
932    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
933    
934    #if !defined NOPOSIX  /* There are still compilers that require no indent */
935    regex_t preg;    regex_t preg;
936      int do_posix = 0;
937    #endif
938    
939    const char *error;    const char *error;
940    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
941    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
942      const unsigned char *tables = NULL;
943      unsigned long int true_size, true_study_size = 0;
944      size_t size, regex_gotten_store;
945    int do_study = 0;    int do_study = 0;
946    int do_debug = debug;    int do_debug = debug;
947      int do_G = 0;
948      int do_g = 0;
949    int do_showinfo = showinfo;    int do_showinfo = showinfo;
950    int do_posix = 0;    int do_showrest = 0;
951    int erroroffset, len, delimiter;    int do_flip = 0;
952      int erroroffset, len, delimiter, poffset;
953    
954      use_utf8 = 0;
955    
956    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
957    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
958    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
959      fflush(outfile);
960    
961    p = buffer;    p = buffer;
962    while (isspace(*p)) p++;    while (isspace(*p)) p++;
963    if (*p == 0) continue;    if (*p == 0) continue;
964    
965    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
966    complete, read more. */  
967      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
968        {
969        unsigned long int magic, get_options;
970        uschar sbuf[8];
971        FILE *f;
972    
973        p++;
974        pp = p + (int)strlen((char *)p);
975        while (isspace(pp[-1])) pp--;
976        *pp = 0;
977    
978        f = fopen((char *)p, "rb");
979        if (f == NULL)
980          {
981          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
982          continue;
983          }
984    
985        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
986    
987        true_size =
988          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
989        true_study_size =
990          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
991    
992        re = (real_pcre *)new_malloc(true_size);
993        regex_gotten_store = gotten_store;
994    
995        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
996    
997        magic = ((real_pcre *)re)->magic_number;
998        if (magic != MAGIC_NUMBER)
999          {
1000          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1001            {
1002            do_flip = 1;
1003            }
1004          else
1005            {
1006            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1007            fclose(f);
1008            continue;
1009            }
1010          }
1011    
1012        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1013          do_flip? " (byte-inverted)" : "", p);
1014    
1015        /* Need to know if UTF-8 for printing data strings */
1016    
1017        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1018        use_utf8 = (get_options & PCRE_UTF8) != 0;
1019    
1020        /* Now see if there is any following study data */
1021    
1022        if (true_study_size != 0)
1023          {
1024          pcre_study_data *psd;
1025    
1026          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1027          extra->flags = PCRE_EXTRA_STUDY_DATA;
1028    
1029          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1030          extra->study_data = psd;
1031    
1032          if (fread(psd, 1, true_study_size, f) != true_study_size)
1033            {
1034            FAIL_READ:
1035            fprintf(outfile, "Failed to read data from %s\n", p);
1036            if (extra != NULL) new_free(extra);
1037            if (re != NULL) new_free(re);
1038            fclose(f);
1039            continue;
1040            }
1041          fprintf(outfile, "Study data loaded from %s\n", p);
1042          do_study = 1;     /* To get the data output if requested */
1043          }
1044        else fprintf(outfile, "No study data\n");
1045    
1046        fclose(f);
1047        goto SHOW_INFO;
1048        }
1049    
1050      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1051      the pattern; if is isn't complete, read more. */
1052    
1053    delimiter = *p++;    delimiter = *p++;
1054    
# Line 396  while (!done) Line 1059  while (!done)
1059      }      }
1060    
1061    pp = p;    pp = p;
1062      poffset = p - buffer;
1063    
1064    for(;;)    for(;;)
1065      {      {
# Line 406  while (!done) Line 1070  while (!done)
1070        pp++;        pp++;
1071        }        }
1072      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1073      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1074      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1075        {        {
1076        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1077        done = 1;        done = 1;
# Line 424  while (!done) Line 1080  while (!done)
1080      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1081      }      }
1082    
1083      /* The buffer may have moved while being extended; reset the start of data
1084      pointer to the correct relative point in the buffer. */
1085    
1086      p = buffer + poffset;
1087    
1088    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1089    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1090    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1091    
1092    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1093    
1094    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1095      for callouts. */
1096    
1097    *pp++ = 0;    *pp++ = 0;
1098      strcpy((char *)pbuffer, (char *)p);
1099    
1100    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1101    
# Line 444  while (!done) Line 1107  while (!done)
1107      {      {
1108      switch (*pp++)      switch (*pp++)
1109        {        {
1110          case 'f': options |= PCRE_FIRSTLINE; break;
1111          case 'g': do_g = 1; break;
1112        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1113        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1114        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1115        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1116    
1117          case '+': do_showrest = 1; break;
1118        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1119          case 'B': do_debug = 1; break;
1120          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1121        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1122        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1123          case 'F': do_flip = 1; break;
1124          case 'G': do_G = 1; break;
1125        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1126          case 'J': options |= PCRE_DUPNAMES; break;
1127        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1128          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1129    
1130    #if !defined NOPOSIX
1131        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1132    #endif
1133    
1134        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1135        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1136        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1137          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1138          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1139    
1140        case 'L':        case 'L':
1141        ppp = pp;        ppp = pp;
1142        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1143          /* The '0' test is just in case this is an unterminated line. */
1144          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1145        *ppp = 0;        *ppp = 0;
1146        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1147          {          {
1148          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1149          goto SKIP_DATA;          goto SKIP_DATA;
1150          }          }
1151          locale_set = 1;
1152        tables = pcre_maketables();        tables = pcre_maketables();
1153        pp = ppp;        pp = ppp;
1154        break;        break;
1155    
1156        case '\n': case ' ': break;        case '>':
1157          to_file = pp;
1158          while (*pp != 0) pp++;
1159          while (isspace(pp[-1])) pp--;
1160          *pp = 0;
1161          break;
1162    
1163          case '<':
1164            {
1165            int x = check_newline(pp, outfile);
1166            if (x == 0) goto SKIP_DATA;
1167            options |= x;
1168            while (*pp++ != '>');
1169            }
1170          break;
1171    
1172          case '\r':                      /* So that it works in Windows */
1173          case '\n':
1174          case ' ':
1175          break;
1176    
1177        default:        default:
1178        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1179        goto SKIP_DATA;        goto SKIP_DATA;
# Line 483  while (!done) Line 1184  while (!done)
1184    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
1185    local character tables. */    local character tables. */
1186    
1187    #if !defined NOPOSIX
1188    if (posix || do_posix)    if (posix || do_posix)
1189      {      {
1190      int rc;      int rc;
1191      int cflags = 0;      int cflags = 0;
1192    
1193      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1194      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1195        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1196        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1197        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1198    
1199      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1200    
1201      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 496  while (!done) Line 1203  while (!done)
1203    
1204      if (rc != 0)      if (rc != 0)
1205        {        {
1206        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1207        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1208        goto SKIP_DATA;        goto SKIP_DATA;
1209        }        }
# Line 505  while (!done) Line 1212  while (!done)
1212    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1213    
1214    else    else
1215    #endif  /* !defined NOPOSIX */
1216    
1217      {      {
1218      if (timeit)      if (timeit > 0)
1219        {        {
1220        register int i;        register int i;
1221        clock_t time_taken;        clock_t time_taken;
1222        clock_t start_time = clock();        clock_t start_time = clock();
1223        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1224          {          {
1225          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1226          if (re != NULL) free(re);          if (re != NULL) free(re);
1227          }          }
1228        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1229        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1230          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1231          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1232        }        }
1233    
1234      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 535  while (!done) Line 1244  while (!done)
1244          {          {
1245          for (;;)          for (;;)
1246            {            {
1247            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1248              {              {
1249              done = 1;              done = 1;
1250              goto CONTINUE;              goto CONTINUE;
# Line 549  while (!done) Line 1258  while (!done)
1258        goto CONTINUE;        goto CONTINUE;
1259        }        }
1260    
1261      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
1262        info-returning functions. The old one has a limited interface and
1263        returns only limited data. Check that it agrees with the newer one. */
1264    
1265        if (log_store)
1266          fprintf(outfile, "Memory allocation (code space): %d\n",
1267            (int)(gotten_store -
1268                  sizeof(real_pcre) -
1269                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1270    
1271        /* Extract the size for possible writing before possibly flipping it,
1272        and remember the store that was got. */
1273    
1274        true_size = ((real_pcre *)re)->size;
1275        regex_gotten_store = gotten_store;
1276    
1277        /* If /S was present, study the regexp to generate additional info to
1278        help with the matching. */
1279    
1280        if (do_study)
1281          {
1282          if (timeit > 0)
1283            {
1284            register int i;
1285            clock_t time_taken;
1286            clock_t start_time = clock();
1287            for (i = 0; i < timeit; i++)
1288              extra = pcre_study(re, study_options, &error);
1289            time_taken = clock() - start_time;
1290            if (extra != NULL) free(extra);
1291            fprintf(outfile, "  Study time %.4f milliseconds\n",
1292              (((double)time_taken * 1000.0) / (double)timeit) /
1293                (double)CLOCKS_PER_SEC);
1294            }
1295          extra = pcre_study(re, study_options, &error);
1296          if (error != NULL)
1297            fprintf(outfile, "Failed to study: %s\n", error);
1298          else if (extra != NULL)
1299            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1300          }
1301    
1302        /* If the 'F' option was present, we flip the bytes of all the integer
1303        fields in the regex data block and the study block. This is to make it
1304        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1305        compiled on a different architecture. */
1306    
1307        if (do_flip)
1308          {
1309          real_pcre *rre = (real_pcre *)re;
1310          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1311          rre->size = byteflip(rre->size, sizeof(rre->size));
1312          rre->options = byteflip(rre->options, sizeof(rre->options));
1313          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1314          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1315          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1316          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1317          rre->name_table_offset = byteflip(rre->name_table_offset,
1318            sizeof(rre->name_table_offset));
1319          rre->name_entry_size = byteflip(rre->name_entry_size,
1320            sizeof(rre->name_entry_size));
1321          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1322    
1323          if (extra != NULL)
1324            {
1325            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1326            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1327            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1328            }
1329          }
1330    
1331        /* Extract information from the compiled data if required */
1332    
1333        SHOW_INFO:
1334    
1335        if (do_debug)
1336          {
1337          fprintf(outfile, "------------------------------------------------------------------\n");
1338          pcre_printint(re, outfile);
1339          }
1340    
1341        if (do_showinfo)
1342          {
1343          unsigned long int get_options, all_options;
1344    #if !defined NOINFOCHECK
1345          int old_first_char, old_options, old_count;
1346    #endif
1347          int count, backrefmax, first_char, need_char;
1348          int nameentrysize, namecount;
1349          const uschar *nametable;
1350    
1351          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1352          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1353          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1354          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1355          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1356          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1357          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1358          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1359          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1360    
1361    #if !defined NOINFOCHECK
1362          old_count = pcre_info(re, &old_options, &old_first_char);
1363          if (count < 0) fprintf(outfile,
1364            "Error %d from pcre_info()\n", count);
1365          else
1366            {
1367            if (old_count != count) fprintf(outfile,
1368              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1369                old_count);
1370    
1371            if (old_first_char != first_char) fprintf(outfile,
1372              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1373                first_char, old_first_char);
1374    
1375            if (old_options != (int)get_options) fprintf(outfile,
1376              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1377                get_options, old_options);
1378            }
1379    #endif
1380    
1381          if (size != regex_gotten_store) fprintf(outfile,
1382            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1383            (int)size, (int)regex_gotten_store);
1384    
1385          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1386          if (backrefmax > 0)
1387            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1388    
1389          if (namecount > 0)
1390            {
1391            fprintf(outfile, "Named capturing subpatterns:\n");
1392            while (namecount-- > 0)
1393              {
1394              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1395                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1396                GET2(nametable, 0));
1397              nametable += nameentrysize;
1398              }
1399            }
1400    
1401          /* The NOPARTIAL bit is a private bit in the options, so we have
1402          to fish it out via out back door */
1403    
1404          all_options = ((real_pcre *)re)->options;
1405          if (do_flip)
1406            {
1407            all_options = byteflip(all_options, sizeof(all_options));
1408             }
1409    
1410          if ((all_options & PCRE_NOPARTIAL) != 0)
1411            fprintf(outfile, "Partial matching not supported\n");
1412    
1413          if (get_options == 0) fprintf(outfile, "No options\n");
1414            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1415              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1416              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1417              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1418              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1419              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1420              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1421              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1422              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1423              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1424              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1425              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1426              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1427              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1428    
1429          switch (get_options & PCRE_NEWLINE_BITS)
1430            {
1431            case PCRE_NEWLINE_CR:
1432            fprintf(outfile, "Forced newline sequence: CR\n");
1433            break;
1434    
1435            case PCRE_NEWLINE_LF:
1436            fprintf(outfile, "Forced newline sequence: LF\n");
1437            break;
1438    
1439            case PCRE_NEWLINE_CRLF:
1440            fprintf(outfile, "Forced newline sequence: CRLF\n");
1441            break;
1442    
1443      if (do_showinfo)          case PCRE_NEWLINE_ANY:
1444        {          fprintf(outfile, "Forced newline sequence: ANY\n");
1445        int first_char, count;          break;
1446    
1447        if (do_debug) print_internals(re, outfile);          default:
1448            break;
1449            }
1450    
1451        count = pcre_info(re, &options, &first_char);        if (first_char == -1)
1452        if (count < 0) fprintf(outfile,          {
1453          "Error %d while reading info\n", count);          fprintf(outfile, "First char at start or follows newline\n");
1454            }
1455          else if (first_char < 0)
1456            {
1457            fprintf(outfile, "No first char\n");
1458            }
1459        else        else
1460          {          {
1461          fprintf(outfile, "Identifying subpattern count = %d\n", count);          int ch = first_char & 255;
1462          if (options == 0) fprintf(outfile, "No options\n");          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1463            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",            "" : " (caseless)";
1464              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",          if (PRINTHEX(ch))
1465              ((options & PCRE_CASELESS) != 0)? " caseless" : "",            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1466              ((options & PCRE_EXTENDED) != 0)? " extended" : "",          else
1467              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1468              ((options & PCRE_DOTALL) != 0)? " dotall" : "",          }
1469              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
1470              ((options & PCRE_EXTRA) != 0)? " extra" : "",        if (need_char < 0)
1471              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");          {
1472          if (first_char == -1)          fprintf(outfile, "No need char\n");
1473            {          }
1474            fprintf(outfile, "First char at start or follows \\n\n");        else
1475            }          {
1476          else if (first_char < 0)          int ch = need_char & 255;
1477            {          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1478            fprintf(outfile, "No first char\n");            "" : " (caseless)";
1479            }          if (PRINTHEX(ch))
1480              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1481            else
1482              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1483            }
1484    
1485          /* Don't output study size; at present it is in any case a fixed
1486          value, but it varies, depending on the computer architecture, and
1487          so messes up the test suite. (And with the /F option, it might be
1488          flipped.) */
1489    
1490          if (do_study)
1491            {
1492            if (extra == NULL)
1493              fprintf(outfile, "Study returned NULL\n");
1494          else          else
1495            {            {
1496            if (isprint(first_char))            uschar *start_bits = NULL;
1497              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1498    
1499              if (start_bits == NULL)
1500                fprintf(outfile, "No starting byte set\n");
1501            else            else
1502              fprintf(outfile, "First char = %d\n", first_char);              {
1503                int i;
1504                int c = 24;
1505                fprintf(outfile, "Starting byte set: ");
1506                for (i = 0; i < 256; i++)
1507                  {
1508                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1509                    {
1510                    if (c > 75)
1511                      {
1512                      fprintf(outfile, "\n  ");
1513                      c = 2;
1514                      }
1515                    if (PRINTHEX(i) && i != ' ')
1516                      {
1517                      fprintf(outfile, "%c ", i);
1518                      c += 2;
1519                      }
1520                    else
1521                      {
1522                      fprintf(outfile, "\\x%02x ", i);
1523                      c += 5;
1524                      }
1525                    }
1526                  }
1527                fprintf(outfile, "\n");
1528                }
1529            }            }
1530          }          }
1531        }        }
1532    
1533      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1534      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1535        the study length, in big-endian order. */
1536    
1537      if (do_study)      if (to_file != NULL)
1538        {        {
1539        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1540          if (f == NULL)
1541          {          {
1542          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
1543          }          }
1544          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (do_showinfo)  
1545          {          {
1546          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1547          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (true_size >> 24)  & 255;
1548            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1549            sbuf[2] = (true_size >>  8)  & 255;
1550            sbuf[3] = (true_size)  & 255;
1551    
1552            sbuf[4] = (true_study_size >> 24)  & 255;
1553            sbuf[5] = (true_study_size >> 16)  & 255;
1554            sbuf[6] = (true_study_size >>  8)  & 255;
1555            sbuf[7] = (true_study_size)  & 255;
1556    
1557            if (fwrite(sbuf, 1, 8, f) < 8 ||
1558                fwrite(re, 1, true_size, f) < true_size)
1559              {
1560              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1561              }
1562          else          else
1563            {            {
1564            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1565            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1566              {              {
1567              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1568                    true_study_size)
1569                {                {
1570                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1571                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1572                }                }
1573                else fprintf(outfile, "Study data written to %s\n", to_file);
1574    
1575              }              }
           fprintf(outfile, "\n");  
1576            }            }
1577            fclose(f);
1578          }          }
1579    
1580          new_free(re);
1581          if (extra != NULL) new_free(extra);
1582          if (tables != NULL) new_free((void *)tables);
1583          continue;  /* With next regex */
1584        }        }
1585      }      }        /* End of non-POSIX compile */
1586    
1587    /* Read data lines and test them */    /* Read data lines and test them */
1588    
1589    for (;;)    for (;;)
1590      {      {
1591      unsigned char *q;      uschar *q;
1592        uschar *bptr = dbuffer;
1593        int *use_offsets = offsets;
1594        int use_size_offsets = size_offsets;
1595        int callout_data = 0;
1596        int callout_data_set = 0;
1597      int count, c;      int count, c;
1598      int copystrings = 0;      int copystrings = 0;
1599        int find_match_limit = 0;
1600      int getstrings = 0;      int getstrings = 0;
1601      int getlist = 0;      int getlist = 0;
1602      int offsets[45];      int gmatched = 0;
1603      int size_offsets = sizeof(offsets)/sizeof(int);      int start_offset = 0;
1604        int g_notempty = 0;
1605        int use_dfa = 0;
1606    
1607      options = 0;      options = 0;
1608    
1609      if (infile == stdin) printf("  data> ");      *copynames = 0;
1610      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1611    
1612        copynamesptr = copynames;
1613        getnamesptr = getnames;
1614    
1615        pcre_callout = callout;
1616        first_callout = 1;
1617        callout_extra = 0;
1618        callout_count = 0;
1619        callout_fail_count = 999999;
1620        callout_fail_id = -1;
1621        show_malloc = 0;
1622    
1623        if (extra != NULL) extra->flags &=
1624          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1625    
1626        len = 0;
1627        for (;;)
1628        {        {
1629        done = 1;        if (infile == stdin) printf("data> ");
1630        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1631            {
1632            if (len > 0) break;
1633            done = 1;
1634            goto CONTINUE;
1635            }
1636          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1637          len = (int)strlen((char *)buffer);
1638          if (buffer[len-1] == '\n') break;
1639        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1640    
     len = (int)strlen((char *)buffer);  
1641      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1642      buffer[len] = 0;      buffer[len] = 0;
1643      if (len == 0) break;      if (len == 0) break;
# Line 691  while (!done) Line 1650  while (!done)
1650        {        {
1651        int i = 0;        int i = 0;
1652        int n = 0;        int n = 0;
1653    
1654        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1655          {          {
1656          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 707  while (!done) Line 1667  while (!done)
1667          c -= '0';          c -= '0';
1668          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1669            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1670    
1671    #if !defined NOUTF8
1672            if (use_utf8 && c > 255)
1673              {
1674              unsigned char buff8[8];
1675              int ii, utn;
1676              utn = ord2utf8(c, buff8);
1677              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1678              c = buff8[ii];   /* Last byte */
1679              }
1680    #endif
1681          break;          break;
1682    
1683          case 'x':          case 'x':
1684    
1685            /* Handle \x{..} specially - new Perl thing for utf8 */
1686    
1687    #if !defined NOUTF8
1688            if (*p == '{')
1689              {
1690              unsigned char *pt = p;
1691              c = 0;
1692              while (isxdigit(*(++pt)))
1693                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1694              if (*pt == '}')
1695                {
1696                unsigned char buff8[8];
1697                int ii, utn;
1698                utn = ord2utf8(c, buff8);
1699                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1700                c = buff8[ii];   /* Last byte */
1701                p = pt + 1;
1702                break;
1703                }
1704              /* Not correct form; fall through */
1705              }
1706    #endif
1707    
1708            /* Ordinary \x */
1709    
1710          c = 0;          c = 0;
1711          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1712            {            {
# Line 718  while (!done) Line 1715  while (!done)
1715            }            }
1716          break;          break;
1717    
1718          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1719          p--;          p--;
1720          continue;          continue;
1721    
1722            case '>':
1723            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1724            continue;
1725    
1726          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1727          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1728          continue;          continue;
# Line 731  while (!done) Line 1732  while (!done)
1732          continue;          continue;
1733    
1734          case 'C':          case 'C':
1735          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1736          copystrings |= 1 << n;            {
1737              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1738              copystrings |= 1 << n;
1739              }
1740            else if (isalnum(*p))
1741              {
1742              uschar *npp = copynamesptr;
1743              while (isalnum(*p)) *npp++ = *p++;
1744              *npp++ = 0;
1745              *npp = 0;
1746              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1747              if (n < 0)
1748                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1749              copynamesptr = npp;
1750              }
1751            else if (*p == '+')
1752              {
1753              callout_extra = 1;
1754              p++;
1755              }
1756            else if (*p == '-')
1757              {
1758              pcre_callout = NULL;
1759              p++;
1760              }
1761            else if (*p == '!')
1762              {
1763              callout_fail_id = 0;
1764              p++;
1765              while(isdigit(*p))
1766                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1767              callout_fail_count = 0;
1768              if (*p == '!')
1769                {
1770                p++;
1771                while(isdigit(*p))
1772                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1773                }
1774              }
1775            else if (*p == '*')
1776              {
1777              int sign = 1;
1778              callout_data = 0;
1779              if (*(++p) == '-') { sign = -1; p++; }
1780              while(isdigit(*p))
1781                callout_data = callout_data * 10 + *p++ - '0';
1782              callout_data *= sign;
1783              callout_data_set = 1;
1784              }
1785            continue;
1786    
1787    #if !defined NODFA
1788            case 'D':
1789    #if !defined NOPOSIX
1790            if (posix || do_posix)
1791              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1792            else
1793    #endif
1794              use_dfa = 1;
1795            continue;
1796    
1797            case 'F':
1798            options |= PCRE_DFA_SHORTEST;
1799          continue;          continue;
1800    #endif
1801    
1802          case 'G':          case 'G':
1803          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1804          getstrings |= 1 << n;            {
1805              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1806              getstrings |= 1 << n;
1807              }
1808            else if (isalnum(*p))
1809              {
1810              uschar *npp = getnamesptr;
1811              while (isalnum(*p)) *npp++ = *p++;
1812              *npp++ = 0;
1813              *npp = 0;
1814              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1815              if (n < 0)
1816                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1817              getnamesptr = npp;
1818              }
1819          continue;          continue;
1820    
1821          case 'L':          case 'L':
1822          getlist = 1;          getlist = 1;
1823          continue;          continue;
1824    
1825            case 'M':
1826            find_match_limit = 1;
1827            continue;
1828    
1829            case 'N':
1830            options |= PCRE_NOTEMPTY;
1831            continue;
1832    
1833          case 'O':          case 'O':
1834          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1835          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1836              {
1837              size_offsets_max = n;
1838              free(offsets);
1839              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1840              if (offsets == NULL)
1841                {
1842                printf("** Failed to get %d bytes of memory for offsets vector\n",
1843                  size_offsets_max * sizeof(int));
1844                yield = 1;
1845                goto EXIT;
1846                }
1847              }
1848            use_size_offsets = n;
1849            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1850            continue;
1851    
1852            case 'P':
1853            options |= PCRE_PARTIAL;
1854            continue;
1855    
1856            case 'Q':
1857            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1858            if (extra == NULL)
1859              {
1860              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1861              extra->flags = 0;
1862              }
1863            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1864            extra->match_limit_recursion = n;
1865            continue;
1866    
1867            case 'q':
1868            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1869            if (extra == NULL)
1870              {
1871              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1872              extra->flags = 0;
1873              }
1874            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1875            extra->match_limit = n;
1876            continue;
1877    
1878    #if !defined NODFA
1879            case 'R':
1880            options |= PCRE_DFA_RESTART;
1881            continue;
1882    #endif
1883    
1884            case 'S':
1885            show_malloc = 1;
1886          continue;          continue;
1887    
1888          case 'Z':          case 'Z':
1889          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1890          continue;          continue;
1891    
1892            case '?':
1893            options |= PCRE_NO_UTF8_CHECK;
1894            continue;
1895    
1896            case '<':
1897              {
1898              int x = check_newline(p, outfile);
1899              if (x == 0) goto NEXT_DATA;
1900              options |= x;
1901              while (*p++ != '>');
1902              }
1903            continue;
1904          }          }
1905        *q++ = c;        *q++ = c;
1906        }        }
1907      *q = 0;      *q = 0;
1908      len = q - dbuffer;      len = q - dbuffer;
1909    
1910        if ((all_use_dfa || use_dfa) && find_match_limit)
1911          {
1912          printf("**Match limit not relevant for DFA matching: ignored\n");
1913          find_match_limit = 0;
1914          }
1915    
1916      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1917      support timing. */      support timing or playing with the match limit or callout data. */
1918    
1919    #if !defined NOPOSIX
1920      if (posix || do_posix)      if (posix || do_posix)
1921        {        {
1922        int rc;        int rc;
1923        int eflags = 0;        int eflags = 0;
1924        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1925          if (use_size_offsets > 0)
1926            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1927        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1928        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1929    
1930        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1931    
1932        if (rc != 0)        if (rc != 0)
1933          {          {
1934          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1935          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1936          }          }
1937          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1938                  != 0)
1939            {
1940            fprintf(outfile, "Matched with REG_NOSUB\n");
1941            }
1942        else        else
1943          {          {
1944          size_t i;          size_t i;
1945          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1946            {            {
1947            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1948              {              {
1949              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1950              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1951                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1952              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1953                if (i == 0 && do_showrest)
1954                  {
1955                  fprintf(outfile, " 0+ ");
1956                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1957                    outfile);
1958                  fprintf(outfile, "\n");
1959                  }
1960              }              }
1961            }            }
1962          }          }
1963          free(pmatch);
1964        }        }
1965    
1966      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1967    
1968      else      else
1969    #endif  /* !defined NOPOSIX */
1970    
1971        for (;; gmatched++)    /* Loop for /g or /G */
1972        {        {
1973        if (timeit)        if (timeitm > 0)
1974          {          {
1975          register int i;          register int i;
1976          clock_t time_taken;          clock_t time_taken;
1977          clock_t start_time = clock();          clock_t start_time = clock();
1978          for (i = 0; i < LOOPREPEAT; i++)  
1979            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
1980              size_offsets);          if (all_use_dfa || use_dfa)
1981              {
1982              int workspace[1000];
1983              for (i = 0; i < timeitm; i++)
1984                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1985                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1986                  sizeof(workspace)/sizeof(int));
1987              }
1988            else
1989    #endif
1990    
1991            for (i = 0; i < timeitm; i++)
1992              count = pcre_exec(re, extra, (char *)bptr, len,
1993                start_offset, options | g_notempty, use_offsets, use_size_offsets);
1994    
1995          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1996          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
1997            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
1998            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1999            }
2000    
2001          /* If find_match_limit is set, we want to do repeated matches with
2002          varying limits in order to find the minimum value for the match limit and
2003          for the recursion limit. */
2004    
2005          if (find_match_limit)
2006            {
2007            if (extra == NULL)
2008              {
2009              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2010              extra->flags = 0;
2011              }
2012    
2013            (void)check_match_limit(re, extra, bptr, len, start_offset,
2014              options|g_notempty, use_offsets, use_size_offsets,
2015              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2016              PCRE_ERROR_MATCHLIMIT, "match()");
2017    
2018            count = check_match_limit(re, extra, bptr, len, start_offset,
2019              options|g_notempty, use_offsets, use_size_offsets,
2020              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2021              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2022            }
2023    
2024          /* If callout_data is set, use the interface with additional data */
2025    
2026          else if (callout_data_set)
2027            {
2028            if (extra == NULL)
2029              {
2030              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2031              extra->flags = 0;
2032              }
2033            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2034            extra->callout_data = &callout_data;
2035            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2036              options | g_notempty, use_offsets, use_size_offsets);
2037            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2038          }          }
2039    
2040        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
2041          size_offsets);        value of match_limit. */
2042    
2043    #if !defined NODFA
2044          else if (all_use_dfa || use_dfa)
2045            {
2046            int workspace[1000];
2047            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2048              options | g_notempty, use_offsets, use_size_offsets, workspace,
2049              sizeof(workspace)/sizeof(int));
2050            if (count == 0)
2051              {
2052              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2053              count = use_size_offsets/2;
2054              }
2055            }
2056    #endif
2057    
2058        if (count == 0)        else
2059          {          {
2060          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2061          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2062            if (count == 0)
2063              {
2064              fprintf(outfile, "Matched, but too many substrings\n");
2065              count = use_size_offsets/3;
2066              }
2067          }          }
2068    
2069          /* Matched */
2070    
2071        if (count >= 0)        if (count >= 0)
2072          {          {
2073          int i;          int i, maxcount;
2074    
2075    #if !defined NODFA
2076            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2077    #endif
2078              maxcount = use_size_offsets/3;
2079    
2080            /* This is a check against a lunatic return value. */
2081    
2082            if (count > maxcount)
2083              {
2084              fprintf(outfile,
2085                "** PCRE error: returned count %d is too big for offset size %d\n",
2086                count, use_size_offsets);
2087              count = use_size_offsets/3;
2088              if (do_g || do_G)
2089                {
2090                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2091                do_g = do_G = FALSE;        /* Break g/G loop */
2092                }
2093              }
2094    
2095          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2096            {            {
2097            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2098              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2099            else            else
2100              {              {
2101              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2102              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2103                  use_offsets[i+1] - use_offsets[i], outfile);
2104              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2105                if (i == 0)
2106                  {
2107                  if (do_showrest)
2108                    {
2109                    fprintf(outfile, " 0+ ");
2110                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2111                      outfile);
2112                    fprintf(outfile, "\n");
2113                    }
2114                  }
2115              }              }
2116            }            }
2117    
# Line 839  while (!done) Line 2119  while (!done)
2119            {            {
2120            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2121              {              {
2122              char buffer[16];              char copybuffer[256];
2123              int rc = pcre_copy_substring((char *)dbuffer, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2124                i, buffer, sizeof(buffer));                i, copybuffer, sizeof(copybuffer));
2125              if (rc < 0)              if (rc < 0)
2126                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2127              else              else
2128                fprintf(outfile, "%2dC %s (%d)\n", i, buffer, rc);                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2129              }              }
2130            }            }
2131    
2132            for (copynamesptr = copynames;
2133                 *copynamesptr != 0;
2134                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2135              {
2136              char copybuffer[256];
2137              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2138                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2139              if (rc < 0)
2140                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2141              else
2142                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2143              }
2144    
2145          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2146            {            {
2147            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
2148              {              {
2149              const char *substring;              const char *substring;
2150              int rc = pcre_get_substring((char *)dbuffer, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2151                i, &substring);                i, &substring);
2152              if (rc < 0)              if (rc < 0)
2153                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
2154              else              else
2155                {                {
2156                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2157                free((void *)substring);                pcre_free_substring(substring);
2158                }                }
2159              }              }
2160            }            }
2161    
2162            for (getnamesptr = getnames;
2163                 *getnamesptr != 0;
2164                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2165              {
2166              const char *substring;
2167              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2168                count, (char *)getnamesptr, &substring);
2169              if (rc < 0)
2170                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2171              else
2172                {
2173                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2174                pcre_free_substring(substring);
2175                }
2176              }
2177    
2178          if (getlist)          if (getlist)
2179            {            {
2180            const char **stringlist;            const char **stringlist;
2181            int rc = pcre_get_substring_list((char *)dbuffer, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2182              &stringlist);              &stringlist);
2183            if (rc < 0)            if (rc < 0)
2184              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 879  while (!done) Line 2188  while (!done)
2188                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2189              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2190                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
2191              free((void *)stringlist);              /* free((void *)stringlist); */
2192                pcre_free_substring_list(stringlist);
2193              }              }
2194            }            }
2195            }
2196    
2197          /* There was a partial match */
2198    
2199          else if (count == PCRE_ERROR_PARTIAL)
2200            {
2201            fprintf(outfile, "Partial match");
2202    #if !defined NODFA
2203            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2204              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2205                bptr + use_offsets[0]);
2206    #endif
2207            fprintf(outfile, "\n");
2208            break;  /* Out of the /g loop */
2209          }          }
2210    
2211          /* Failed to match. If this is a /g or /G loop and we previously set
2212          g_notempty after a null match, this is not necessarily the end.
2213          We want to advance the start offset, and continue. In the case of UTF-8
2214          matching, the advance must be one character, not one byte. Fudge the
2215          offset values to achieve this. We won't be at the end of the string -
2216          that was checked before setting g_notempty. */
2217    
2218        else        else
2219          {          {
2220          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2221              {
2222              int onechar = 1;
2223              use_offsets[0] = start_offset;
2224              if (use_utf8)
2225                {
2226                while (start_offset + onechar < len)
2227                  {
2228                  int tb = bptr[start_offset+onechar];
2229                  if (tb <= 127) break;
2230                  tb &= 0xc0;
2231                  if (tb != 0 && tb != 0xc0) onechar++;
2232                  }
2233                }
2234              use_offsets[1] = start_offset + onechar;
2235              }
2236            else
2237              {
2238              if (count == PCRE_ERROR_NOMATCH)
2239                {
2240                if (gmatched == 0) fprintf(outfile, "No match\n");
2241                }
2242            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2243              break;  /* Out of the /g loop */
2244              }
2245          }          }
2246        }  
2247      }        /* If not /g or /G we are done */
2248    
2249          if (!do_g && !do_G) break;
2250    
2251          /* If we have matched an empty string, first check to see if we are at
2252          the end of the subject. If so, the /g loop is over. Otherwise, mimic
2253          what Perl's /g options does. This turns out to be rather cunning. First
2254          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2255          same point. If this fails (picked up above) we advance to the next
2256          character. */
2257    
2258          g_notempty = 0;
2259          if (use_offsets[0] == use_offsets[1])
2260            {
2261            if (use_offsets[0] == len) break;
2262            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2263            }
2264    
2265          /* For /g, update the start offset, leaving the rest alone */
2266    
2267          if (do_g) start_offset = use_offsets[1];
2268    
2269          /* For /G, update the pointer and length */
2270    
2271          else
2272            {
2273            bptr += use_offsets[1];
2274            len -= use_offsets[1];
2275            }
2276          }  /* End of loop for /g and /G */
2277    
2278        NEXT_DATA: continue;
2279        }    /* End of loop for data lines */
2280    
2281    CONTINUE:    CONTINUE:
2282    
2283    #if !defined NOPOSIX
2284    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2285    if (re != NULL) free(re);  #endif
2286    if (extra != NULL) free(extra);  
2287      if (re != NULL) new_free(re);
2288      if (extra != NULL) new_free(extra);
2289    if (tables != NULL)    if (tables != NULL)
2290      {      {
2291      free((void *)tables);      new_free((void *)tables);
2292      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2293        locale_set = 0;
2294      }      }
2295    }    }
2296    
2297  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2298  return 0;  
2299    EXIT:
2300    
2301    if (infile != NULL && infile != stdin) fclose(infile);
2302    if (outfile != NULL && outfile != stdout) fclose(outfile);
2303    
2304    free(buffer);
2305    free(dbuffer);
2306    free(pbuffer);
2307    free(offsets);
2308    
2309    return yield;
2310  }  }
2311    
2312  /* End */  /* End of pcretest.c */

Legend:
Removed from v.31  
changed lines
  Added in v.107

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12