/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC revision 169 by ph10, Mon Jun 4 10:49:21 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44    #include <locale.h>
45    #include <errno.h>
46    
47    
48    /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
69    
70    /* We have to include pcre_internal.h because we need the internal info for
71    displaying the results of pcre_study() and we also need to know about the
72    internal macros, structures, and other internal data values; pcretest has
73    "inside information" compared to a program that strictly follows the PCRE API.
74    
75    Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80    #include "pcre_internal.h"
81    
82    /* We need access to the data tables that PCRE uses. So as not to have to keep
83    two copies, we include the source file here, changing the names of the external
84    symbols to prevent clashes. */
85    
86    #define _pcre_utf8_table1      utf8_table1
87    #define _pcre_utf8_table1_size utf8_table1_size
88    #define _pcre_utf8_table2      utf8_table2
89    #define _pcre_utf8_table3      utf8_table3
90    #define _pcre_utf8_table4      utf8_table4
91    #define _pcre_utt              utt
92    #define _pcre_utt_size         utt_size
93    #define _pcre_OP_lengths       OP_lengths
94    
95  /* Use the internal info for displaying the results of pcre_study(). */  #include "pcre_tables.c"
96    
97  #include "internal.h"  /* We also need the pcre_printint() function for printing out compiled
98    patterns. This function is in a separate file so that it can be included in
99    pcre_compile.c when that module is compiled with debugging enabled.
100    
101    The definition of the macro PRINTABLE, which determines whether to print an
102    output character as-is or as a hex value when showing compiled patterns, is
103    contained in this file. We uses it here also, in cases when the locale has not
104    been explicitly changed, so as to get consistent output from systems that
105    differ in their output from isprint() even in the "C" locale. */
106    
107    #include "pcre_printint.src"
108    
109    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110    
111    
112    /* It is possible to compile this test program without including support for
113    testing the POSIX interface, though this is not available via the standard
114    Makefile. */
115    
116    #if !defined NOPOSIX
117  #include "pcreposix.h"  #include "pcreposix.h"
118    #endif
119    
120    /* It is also possible, for the benefit of the version currently imported into
121    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122    interface to the DFA matcher (NODFA), and without the doublecheck of the old
123    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124    UTF8 support if PCRE is built without it. */
125    
126    #ifndef SUPPORT_UTF8
127    #ifndef NOUTF8
128    #define NOUTF8
129    #endif
130    #endif
131    
132    
133    /* Other parameters */
134    
135  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
136  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 140 
140  #endif  #endif
141  #endif  #endif
142    
143  #define LOOPREPEAT 10000  /* This is the default loop count for timing. */
144    
145    #define LOOPREPEAT 500000
146    
147    /* Static variables */
148    
149  static FILE *outfile;  static FILE *outfile;
150  static int log_store = 0;  static int log_store = 0;
151    static int callout_count;
152    static int callout_extra;
153    static int callout_fail_count;
154    static int callout_fail_id;
155    static int first_callout;
156    static int locale_set = 0;
157    static int show_malloc;
158    static int use_utf8;
159    static size_t gotten_store;
160    
161    /* The buffers grow automatically if very long input lines are encountered. */
162    
163    static int buffer_size = 50000;
164    static uschar *buffer = NULL;
165    static uschar *dbuffer = NULL;
166    static uschar *pbuffer = NULL;
167    
168    
169    
170  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
171  code as contained in pcre.c under the DEBUG macro. */  *        Read or extend an input line            *
172    *************************************************/
173    
174  static const char *OP_names[] = {  /* Input lines are read into buffer, but both patterns and data lines can be
175    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  continued over multiple input lines. In addition, if the buffer fills up, we
176    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  want to automatically expand it so as to be able to handle extremely large
177    "Opt", "^", "$", "Any", "chars", "not",  lines that are needed for certain stress tests. When the input buffer is
178    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  expanded, the other two buffers must also be expanded likewise, and the
179    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  contents of pbuffer, which are a copy of the input for callouts, must be
180    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  preserved (for when expansion happens for a data line). This is not the most
181    "*", "*?", "+", "+?", "?", "??", "{", "{",  optimal way of handling this, but hey, this is just a test program!
182    "class", "Ref",  
183    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  Arguments:
184    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    f            the file to read
185    "Brazero", "Braminzero", "Bra"    start        where in buffer to start (this *must* be within buffer)
186  };  
187    Returns:       pointer to the start of new data
188                   could be a copy of start, or could be moved
189  static void print_internals(pcre *re, FILE *outfile)                 NULL if no data read and EOF reached
190  {  */
 unsigned char *code = ((real_pcre *)re)->code;  
   
 fprintf(outfile, "------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
191    
192          case OP_CRRANGE:  static uschar *
193          case OP_CRMINRANGE:  extend_inputline(FILE *f, uschar *start)
194          min = (code[1] << 8) + code[2];  {
195          max = (code[3] << 8) + code[4];  uschar *here = start;
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
196    
197          default:  for (;;)
198          code--;    {
199          }    int rlen = buffer_size - (here - buffer);
200    
201      if (rlen > 1000)
202        {
203        int dlen;
204        if (fgets((char *)here, rlen,  f) == NULL)
205          return (here == start)? NULL : start;
206        dlen = (int)strlen((char *)here);
207        if (dlen > 0 && here[dlen - 1] == '\n') return start;
208        here += dlen;
209        }
210    
211      else
212        {
213        int new_buffer_size = 2*buffer_size;
214        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
215        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
216        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
217    
218        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
219          {
220          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
221          exit(1);
222        }        }
     break;  
223    
224      /* Anything else is just a one-node item */      memcpy(new_buffer, buffer, buffer_size);
225        memcpy(new_pbuffer, pbuffer, buffer_size);
226    
227        buffer_size = new_buffer_size;
228    
229        start = new_buffer + (start - buffer);
230        here = new_buffer + (here - buffer);
231    
232      default:      free(buffer);
233      fprintf(outfile, "    %s", OP_names[*code]);      free(dbuffer);
234      break;      free(pbuffer);
235    
236        buffer = new_buffer;
237        dbuffer = new_dbuffer;
238        pbuffer = new_pbuffer;
239      }      }
240      }
241    
242    return NULL;  /* Control never gets here */
243    }
244    
245    
246    
247    
248    
249    
250    code++;  
251    fprintf(outfile, "\n");  /*************************************************
252    *          Read number from string               *
253    *************************************************/
254    
255    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256    around with conditional compilation, just do the job by hand. It is only used
257    for unpicking arguments, so just keep it simple.
258    
259    Arguments:
260      str           string to be converted
261      endptr        where to put the end pointer
262    
263    Returns:        the unsigned long
264    */
265    
266    static int
267    get_value(unsigned char *str, unsigned char **endptr)
268    {
269    int result = 0;
270    while(*str != 0 && isspace(*str)) str++;
271    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
272    *endptr = str;
273    return(result);
274    }
275    
276    
277    
278    
279    /*************************************************
280    *            Convert UTF-8 string to value       *
281    *************************************************/
282    
283    /* This function takes one or more bytes that represents a UTF-8 character,
284    and returns the value of the character.
285    
286    Argument:
287      utf8bytes   a pointer to the byte vector
288      vptr        a pointer to an int to receive the value
289    
290    Returns:      >  0 => the number of bytes consumed
291                  -6 to 0 => malformed UTF-8 character at offset = (-return)
292    */
293    
294    #if !defined NOUTF8
295    
296    static int
297    utf82ord(unsigned char *utf8bytes, int *vptr)
298    {
299    int c = *utf8bytes++;
300    int d = c;
301    int i, j, s;
302    
303    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
304      {
305      if ((d & 0x80) == 0) break;
306      d <<= 1;
307      }
308    
309    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
310    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
311    
312    /* i now has a value in the range 1-5 */
313    
314    s = 6*i;
315    d = (c & utf8_table3[i]) << s;
316    
317    for (j = 0; j < i; j++)
318      {
319      c = *utf8bytes++;
320      if ((c & 0xc0) != 0x80) return -(j+1);
321      s -= 6;
322      d |= (c & 0x3f) << s;
323    }    }
324    
325    /* Check that encoding was the correct unique one */
326    
327    for (j = 0; j < utf8_table1_size; j++)
328      if (d <= utf8_table1[j]) break;
329    if (j != i) return -(i+1);
330    
331    /* Valid value */
332    
333    *vptr = d;
334    return i+1;
335  }  }
336    
337    #endif
338    
339    
340    
341  /* Character string printing function. */  /*************************************************
342    *       Convert character value to UTF-8         *
343    *************************************************/
344    
345  static void pchars(unsigned char *p, int length)  /* This function takes an integer value in the range 0 - 0x7fffffff
346    and encodes it as a UTF-8 character in 0 to 6 bytes.
347    
348    Arguments:
349      cvalue     the character value
350      utf8bytes  pointer to buffer for result - at least 6 bytes long
351    
352    Returns:     number of characters placed in the buffer
353    */
354    
355    #if !defined NOUTF8
356    
357    static int
358    ord2utf8(int cvalue, uschar *utf8bytes)
359  {  {
360  int c;  register int i, j;
361    for (i = 0; i < utf8_table1_size; i++)
362      if (cvalue <= utf8_table1[i]) break;
363    utf8bytes += i;
364    for (j = i; j > 0; j--)
365     {
366     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
367     cvalue >>= 6;
368     }
369    *utf8bytes = utf8_table2[i] | cvalue;
370    return i + 1;
371    }
372    
373    #endif
374    
375    
376    
377    /*************************************************
378    *             Print character string             *
379    *************************************************/
380    
381    /* Character string printing function. Must handle UTF-8 strings in utf8
382    mode. Yields number of characters printed. If handed a NULL file, just counts
383    chars without printing. */
384    
385    static int pchars(unsigned char *p, int length, FILE *f)
386    {
387    int c = 0;
388    int yield = 0;
389    
390  while (length-- > 0)  while (length-- > 0)
391    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
392      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
393      if (use_utf8)
394        {
395        int rc = utf82ord(p, &c);
396    
397        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
398          {
399          length -= rc - 1;
400          p += rc;
401          if (PRINTHEX(c))
402            {
403            if (f != NULL) fprintf(f, "%c", c);
404            yield++;
405            }
406          else
407            {
408            int n = 4;
409            if (f != NULL) fprintf(f, "\\x{%02x}", c);
410            yield += (n <= 0x000000ff)? 2 :
411                     (n <= 0x00000fff)? 3 :
412                     (n <= 0x0000ffff)? 4 :
413                     (n <= 0x000fffff)? 5 : 6;
414            }
415          continue;
416          }
417        }
418    #endif
419    
420       /* Not UTF-8, or malformed UTF-8  */
421    
422      c = *p++;
423      if (PRINTHEX(c))
424        {
425        if (f != NULL) fprintf(f, "%c", c);
426        yield++;
427        }
428      else
429        {
430        if (f != NULL) fprintf(f, "\\x%02x", c);
431        yield += 4;
432        }
433      }
434    
435    return yield;
436    }
437    
438    
439    
440    /*************************************************
441    *              Callout function                  *
442    *************************************************/
443    
444    /* Called from PCRE as a result of the (?C) item. We print out where we are in
445    the match. Yield zero unless more callouts than the fail count, or the callout
446    data is not zero. */
447    
448    static int callout(pcre_callout_block *cb)
449    {
450    FILE *f = (first_callout | callout_extra)? outfile : NULL;
451    int i, pre_start, post_start, subject_length;
452    
453    if (callout_extra)
454      {
455      fprintf(f, "Callout %d: last capture = %d\n",
456        cb->callout_number, cb->capture_last);
457    
458      for (i = 0; i < cb->capture_top * 2; i += 2)
459        {
460        if (cb->offset_vector[i] < 0)
461          fprintf(f, "%2d: <unset>\n", i/2);
462        else
463          {
464          fprintf(f, "%2d: ", i/2);
465          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
466            cb->offset_vector[i+1] - cb->offset_vector[i], f);
467          fprintf(f, "\n");
468          }
469        }
470      }
471    
472    /* Re-print the subject in canonical form, the first time or if giving full
473    datails. On subsequent calls in the same match, we use pchars just to find the
474    printed lengths of the substrings. */
475    
476    if (f != NULL) fprintf(f, "--->");
477    
478    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
479    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
480      cb->current_position - cb->start_match, f);
481    
482    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
483    
484    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
485      cb->subject_length - cb->current_position, f);
486    
487    if (f != NULL) fprintf(f, "\n");
488    
489    /* Always print appropriate indicators, with callout number if not already
490    shown. For automatic callouts, show the pattern offset. */
491    
492    if (cb->callout_number == 255)
493      {
494      fprintf(outfile, "%+3d ", cb->pattern_position);
495      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
496      }
497    else
498      {
499      if (callout_extra) fprintf(outfile, "    ");
500        else fprintf(outfile, "%3d ", cb->callout_number);
501      }
502    
503    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
504    fprintf(outfile, "^");
505    
506    if (post_start > 0)
507      {
508      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
509      fprintf(outfile, "^");
510      }
511    
512    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
513      fprintf(outfile, " ");
514    
515    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
516      pbuffer + cb->pattern_position);
517    
518    fprintf(outfile, "\n");
519    first_callout = 0;
520    
521    if (cb->callout_data != NULL)
522      {
523      int callout_data = *((int *)(cb->callout_data));
524      if (callout_data != 0)
525        {
526        fprintf(outfile, "Callout data = %d\n", callout_data);
527        return callout_data;
528        }
529      }
530    
531    return (cb->callout_number != callout_fail_id)? 0 :
532           (++callout_count >= callout_fail_count)? 1 : 0;
533  }  }
534    
535    
536    /*************************************************
537    *            Local malloc functions              *
538    *************************************************/
539    
540  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
541  compiled re. */  compiled re. */
542    
543  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
544  {  {
545  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
546  return malloc(size);  gotten_store = size;
547    if (show_malloc)
548      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
549    return block;
550    }
551    
552    static void new_free(void *block)
553    {
554    if (show_malloc)
555      fprintf(outfile, "free             %p\n", block);
556    free(block);
557    }
558    
559    
560    /* For recursion malloc/free, to test stacking calls */
561    
562    static void *stack_malloc(size_t size)
563    {
564    void *block = malloc(size);
565    if (show_malloc)
566      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
567    return block;
568    }
569    
570    static void stack_free(void *block)
571    {
572    if (show_malloc)
573      fprintf(outfile, "stack_free       %p\n", block);
574    free(block);
575    }
576    
577    
578    /*************************************************
579    *          Call pcre_fullinfo()                  *
580    *************************************************/
581    
582    /* Get one piece of information from the pcre_fullinfo() function */
583    
584    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
585    {
586    int rc;
587    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
588      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
589    }
590    
591    
592    
593    /*************************************************
594    *         Byte flipping function                 *
595    *************************************************/
596    
597    static unsigned long int
598    byteflip(unsigned long int value, int n)
599    {
600    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
601    return ((value & 0x000000ff) << 24) |
602           ((value & 0x0000ff00) <<  8) |
603           ((value & 0x00ff0000) >>  8) |
604           ((value & 0xff000000) >> 24);
605    }
606    
607    
608    
609    
610    /*************************************************
611    *        Check match or recursion limit          *
612    *************************************************/
613    
614    static int
615    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
616      int start_offset, int options, int *use_offsets, int use_size_offsets,
617      int flag, unsigned long int *limit, int errnumber, const char *msg)
618    {
619    int count;
620    int min = 0;
621    int mid = 64;
622    int max = -1;
623    
624    extra->flags |= flag;
625    
626    for (;;)
627      {
628      *limit = mid;
629    
630      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
631        use_offsets, use_size_offsets);
632    
633      if (count == errnumber)
634        {
635        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
636        min = mid;
637        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
638        }
639    
640      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
641                             count == PCRE_ERROR_PARTIAL)
642        {
643        if (mid == min + 1)
644          {
645          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
646          break;
647          }
648        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
649        max = mid;
650        mid = (min + mid)/2;
651        }
652      else break;    /* Some other error */
653      }
654    
655    extra->flags &= ~flag;
656    return count;
657    }
658    
659    
660    
661    /*************************************************
662    *         Check newline indicator                *
663    *************************************************/
664    
665    /* This is used both at compile and run-time to check for <xxx> escapes, where
666    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
667    no match.
668    
669    Arguments:
670      p           points after the leading '<'
671      f           file for error message
672    
673    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
674    */
675    
676    static int
677    check_newline(uschar *p, FILE *f)
678    {
679    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
680    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
681    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
682    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
683    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
684    fprintf(f, "Unknown newline type at: <%s\n", p);
685    return 0;
686    }
687    
688    
689    
690    /*************************************************
691    *             Usage function                     *
692    *************************************************/
693    
694    static void
695    usage(void)
696    {
697    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
698    printf("  -b       show compiled code (bytecode)\n");
699    printf("  -C       show PCRE compile-time options and exit\n");
700    printf("  -d       debug: show compiled code and information (-b and -i)\n");
701    #if !defined NODFA
702    printf("  -dfa     force DFA matching for all subjects\n");
703    #endif
704    printf("  -help    show usage information\n");
705    printf("  -i       show information about compiled patterns\n"
706           "  -m       output memory used information\n"
707           "  -o <n>   set size of offsets vector to <n>\n");
708    #if !defined NOPOSIX
709    printf("  -p       use POSIX interface\n");
710    #endif
711    printf("  -q       quiet: do not output PCRE version number at start\n");
712    printf("  -S <n>   set stack size to <n> megabytes\n");
713    printf("  -s       output store (memory) used information\n"
714           "  -t       time compilation and execution\n");
715    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
716    printf("  -tm      time execution (matching) only\n");
717    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
718  }  }
719    
720    
721    
722    /*************************************************
723    *                Main Program                    *
724    *************************************************/
725    
726  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
727  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
728  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 290  int options = 0; Line 734  int options = 0;
734  int study_options = 0;  int study_options = 0;
735  int op = 1;  int op = 1;
736  int timeit = 0;  int timeit = 0;
737    int timeitm = 0;
738  int showinfo = 0;  int showinfo = 0;
739    int showstore = 0;
740    int quiet = 0;
741    int size_offsets = 45;
742    int size_offsets_max;
743    int *offsets = NULL;
744    #if !defined NOPOSIX
745  int posix = 0;  int posix = 0;
746    #endif
747  int debug = 0;  int debug = 0;
748  int done = 0;  int done = 0;
749  unsigned char buffer[30000];  int all_use_dfa = 0;
750  unsigned char dbuffer[1024];  int yield = 0;
751    int stack_size;
752    
753    /* These vectors store, end-to-end, a list of captured substring names. Assume
754    that 1024 is plenty long enough for the few names we'll be testing. */
755    
756    uschar copynames[1024];
757    uschar getnames[1024];
758    
759    uschar *copynamesptr;
760    uschar *getnamesptr;
761    
762  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
763    when I am debugging. They grow automatically when very long lines are read. */
764    
765    buffer = (unsigned char *)malloc(buffer_size);
766    dbuffer = (unsigned char *)malloc(buffer_size);
767    pbuffer = (unsigned char *)malloc(buffer_size);
768    
769    /* The outfile variable is static so that new_malloc can use it. */
770    
771  outfile = stdout;  outfile = stdout;
772    
773    /* The following  _setmode() stuff is some Windows magic that tells its runtime
774    library to translate CRLF into a single LF character. At least, that's what
775    I've been told: never having used Windows I take this all on trust. Originally
776    it set 0x8000, but then I was advised that _O_BINARY was better. */
777    
778    #if defined(_WIN32) || defined(WIN32)
779    _setmode( _fileno( stdout ), _O_BINARY );
780    #endif
781    
782  /* Scan options */  /* Scan options */
783    
784  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
785    {    {
786    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
787    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
788      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
789        showstore = 1;
790      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
791      else if (strcmp(argv[op], "-b") == 0) debug = 1;
792    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
793    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
794    #if !defined NODFA
795      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
796    #endif
797      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
798          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
799            *endptr == 0))
800        {
801        op++;
802        argc--;
803        }
804      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
805        {
806        int both = argv[op][2] == 0;
807        int temp;
808        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
809                         *endptr == 0))
810          {
811          timeitm = temp;
812          op++;
813          argc--;
814          }
815        else timeitm = LOOPREPEAT;
816        if (both) timeit = timeitm;
817        }
818      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
819          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
820            *endptr == 0))
821        {
822    #if defined(_WIN32) || defined(WIN32)
823        printf("PCRE: -S not supported on this OS\n");
824        exit(1);
825    #else
826        int rc;
827        struct rlimit rlim;
828        getrlimit(RLIMIT_STACK, &rlim);
829        rlim.rlim_cur = stack_size * 1024 * 1024;
830        rc = setrlimit(RLIMIT_STACK, &rlim);
831        if (rc != 0)
832          {
833        printf("PCRE: setrlimit() failed with error %d\n", rc);
834        exit(1);
835          }
836        op++;
837        argc--;
838    #endif
839        }
840    #if !defined NOPOSIX
841    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
842    #endif
843      else if (strcmp(argv[op], "-C") == 0)
844        {
845        int rc;
846        printf("PCRE version %s\n", pcre_version());
847        printf("Compiled with\n");
848        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
849        printf("  %sUTF-8 support\n", rc? "" : "No ");
850        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
851        printf("  %sUnicode properties support\n", rc? "" : "No ");
852        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
853        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
854          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
855          (rc == -2)? "ANYCRLF" :
856          (rc == -1)? "ANY" : "???");
857        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
858        printf("  Internal link size = %d\n", rc);
859        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
860        printf("  POSIX malloc threshold = %d\n", rc);
861        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
862        printf("  Default match limit = %d\n", rc);
863        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
864        printf("  Default recursion depth limit = %d\n", rc);
865        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
866        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
867        goto EXIT;
868        }
869      else if (strcmp(argv[op], "-help") == 0 ||
870               strcmp(argv[op], "--help") == 0)
871        {
872        usage();
873        goto EXIT;
874        }
875    else    else
876      {      {
877      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
878      return 1;      usage();
879        yield = 1;
880        goto EXIT;
881      }      }
882    op++;    op++;
883    argc--;    argc--;
884    }    }
885    
886    /* Get the store for the offsets vector, and remember what it was */
887    
888    size_offsets_max = size_offsets;
889    offsets = (int *)malloc(size_offsets_max * sizeof(int));
890    if (offsets == NULL)
891      {
892      printf("** Failed to get %d bytes of memory for offsets vector\n",
893        (int)(size_offsets_max * sizeof(int)));
894      yield = 1;
895      goto EXIT;
896      }
897    
898  /* Sort out the input and output files */  /* Sort out the input and output files */
899    
900  if (argc > 1)  if (argc > 1)
901    {    {
902    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
903    if (infile == NULL)    if (infile == NULL)
904      {      {
905      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
906      return 1;      yield = 1;
907        goto EXIT;
908      }      }
909    }    }
910    
911  if (argc > 2)  if (argc > 2)
912    {    {
913    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
914    if (outfile == NULL)    if (outfile == NULL)
915      {      {
916      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
917      return 1;      yield = 1;
918        goto EXIT;
919      }      }
920    }    }
921    
922  /* Set alternative malloc function */  /* Set alternative malloc function */
923    
924  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
925    pcre_free = new_free;
926    pcre_stack_malloc = stack_malloc;
927    pcre_stack_free = stack_free;
928    
929  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
930    
931  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
932    
933  /* Main loop */  /* Main loop */
934    
# Line 355  while (!done) Line 936  while (!done)
936    {    {
937    pcre *re = NULL;    pcre *re = NULL;
938    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
939    
940    #if !defined NOPOSIX  /* There are still compilers that require no indent */
941    regex_t preg;    regex_t preg;
942      int do_posix = 0;
943    #endif
944    
945    const char *error;    const char *error;
946    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
947      unsigned char *to_file = NULL;
948      const unsigned char *tables = NULL;
949      unsigned long int true_size, true_study_size = 0;
950      size_t size, regex_gotten_store;
951    int do_study = 0;    int do_study = 0;
952    int do_debug = 0;    int do_debug = debug;
953    int do_posix = 0;    int debug_lengths = 1;
954    int erroroffset, len, delimiter;    int do_G = 0;
955      int do_g = 0;
956      int do_showinfo = showinfo;
957      int do_showrest = 0;
958      int do_flip = 0;
959      int erroroffset, len, delimiter, poffset;
960    
961      use_utf8 = 0;
962    
963    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
964    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
965    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
966      fflush(outfile);
967    
968    p = buffer;    p = buffer;
969    while (isspace(*p)) p++;    while (isspace(*p)) p++;
970    if (*p == 0) continue;    if (*p == 0) continue;
971    
972    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
973    complete, read more. */  
974      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
975        {
976        unsigned long int magic, get_options;
977        uschar sbuf[8];
978        FILE *f;
979    
980        p++;
981        pp = p + (int)strlen((char *)p);
982        while (isspace(pp[-1])) pp--;
983        *pp = 0;
984    
985        f = fopen((char *)p, "rb");
986        if (f == NULL)
987          {
988          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
989          continue;
990          }
991    
992        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
993    
994        true_size =
995          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
996        true_study_size =
997          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
998    
999        re = (real_pcre *)new_malloc(true_size);
1000        regex_gotten_store = gotten_store;
1001    
1002        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1003    
1004        magic = ((real_pcre *)re)->magic_number;
1005        if (magic != MAGIC_NUMBER)
1006          {
1007          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1008            {
1009            do_flip = 1;
1010            }
1011          else
1012            {
1013            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1014            fclose(f);
1015            continue;
1016            }
1017          }
1018    
1019        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1020          do_flip? " (byte-inverted)" : "", p);
1021    
1022        /* Need to know if UTF-8 for printing data strings */
1023    
1024        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1025        use_utf8 = (get_options & PCRE_UTF8) != 0;
1026    
1027        /* Now see if there is any following study data */
1028    
1029        if (true_study_size != 0)
1030          {
1031          pcre_study_data *psd;
1032    
1033          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1034          extra->flags = PCRE_EXTRA_STUDY_DATA;
1035    
1036          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1037          extra->study_data = psd;
1038    
1039          if (fread(psd, 1, true_study_size, f) != true_study_size)
1040            {
1041            FAIL_READ:
1042            fprintf(outfile, "Failed to read data from %s\n", p);
1043            if (extra != NULL) new_free(extra);
1044            if (re != NULL) new_free(re);
1045            fclose(f);
1046            continue;
1047            }
1048          fprintf(outfile, "Study data loaded from %s\n", p);
1049          do_study = 1;     /* To get the data output if requested */
1050          }
1051        else fprintf(outfile, "No study data\n");
1052    
1053        fclose(f);
1054        goto SHOW_INFO;
1055        }
1056    
1057      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1058      the pattern; if is isn't complete, read more. */
1059    
1060    delimiter = *p++;    delimiter = *p++;
1061    
1062    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
1063      {      {
1064      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1065      goto SKIP_DATA;      goto SKIP_DATA;
1066      }      }
1067    
1068    pp = p;    pp = p;
1069      poffset = p - buffer;
1070    
1071    for(;;)    for(;;)
1072      {      {
1073      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
     if (*pp != 0) break;  
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
1074        {        {
1075        fprintf(outfile, "** Expression too long - missing delimiter?\n");        if (*pp == '\\' && pp[1] != 0) pp++;
1076        goto SKIP_DATA;          else if (*pp == delimiter) break;
1077          pp++;
1078        }        }
1079        if (*pp != 0) break;
1080      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1081      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1082        {        {
1083        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1084        done = 1;        done = 1;
# Line 406  while (!done) Line 1087  while (!done)
1087      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1088      }      }
1089    
1090    /* Terminate the pattern at the delimiter */    /* The buffer may have moved while being extended; reset the start of data
1091      pointer to the correct relative point in the buffer. */
1092    
1093      p = buffer + poffset;
1094    
1095      /* If the first character after the delimiter is backslash, make
1096      the pattern end with backslash. This is purely to provide a way
1097      of testing for the error message when a pattern ends with backslash. */
1098    
1099      if (pp[1] == '\\') *pp++ = '\\';
1100    
1101      /* Terminate the pattern at the delimiter, and save a copy of the pattern
1102      for callouts. */
1103    
1104    *pp++ = 0;    *pp++ = 0;
1105      strcpy((char *)pbuffer, (char *)p);
1106    
1107    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1108    
1109    options = 0;    options = 0;
1110    study_options = 0;    study_options = 0;
1111      log_store = showstore;  /* default from command line */
1112    
1113    while (*pp != 0)    while (*pp != 0)
1114      {      {
1115      switch (*pp++)      switch (*pp++)
1116        {        {
1117          case 'f': options |= PCRE_FIRSTLINE; break;
1118          case 'g': do_g = 1; break;
1119        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1120        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1121        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1122        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1123    
1124          case '+': do_showrest = 1; break;
1125        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1126        case 'D': do_debug = 1; break;        case 'B': do_debug = 1; break;
1127          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1128          case 'D': do_debug = do_showinfo = 1; break;
1129        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1130          case 'F': do_flip = 1; break;
1131          case 'G': do_G = 1; break;
1132          case 'I': do_showinfo = 1; break;
1133          case 'J': options |= PCRE_DUPNAMES; break;
1134          case 'M': log_store = 1; break;
1135          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1136    
1137    #if !defined NOPOSIX
1138        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1139    #endif
1140    
1141        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1142        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1143        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1144        case '\n': case ' ': break;        case 'Z': debug_lengths = 0; break;
1145          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1146          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1147    
1148          case 'L':
1149          ppp = pp;
1150          /* The '\r' test here is so that it works on Windows. */
1151          /* The '0' test is just in case this is an unterminated line. */
1152          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1153          *ppp = 0;
1154          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1155            {
1156            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1157            goto SKIP_DATA;
1158            }
1159          locale_set = 1;
1160          tables = pcre_maketables();
1161          pp = ppp;
1162          break;
1163    
1164          case '>':
1165          to_file = pp;
1166          while (*pp != 0) pp++;
1167          while (isspace(pp[-1])) pp--;
1168          *pp = 0;
1169          break;
1170    
1171          case '<':
1172            {
1173            int x = check_newline(pp, outfile);
1174            if (x == 0) goto SKIP_DATA;
1175            options |= x;
1176            while (*pp++ != '>');
1177            }
1178          break;
1179    
1180          case '\r':                      /* So that it works in Windows */
1181          case '\n':
1182          case ' ':
1183          break;
1184    
1185        default:        default:
1186        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1187        goto SKIP_DATA;        goto SKIP_DATA;
# Line 437  while (!done) Line 1189  while (!done)
1189      }      }
1190    
1191    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1192    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
1193      local character tables. */
1194    
1195    #if !defined NOPOSIX
1196    if (posix || do_posix)    if (posix || do_posix)
1197      {      {
1198      int rc;      int rc;
1199      int cflags = 0;      int cflags = 0;
1200    
1201      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1202      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1203        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1204        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1205        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1206    
1207      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1208    
1209      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 452  while (!done) Line 1211  while (!done)
1211    
1212      if (rc != 0)      if (rc != 0)
1213        {        {
1214        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1215        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1216        goto SKIP_DATA;        goto SKIP_DATA;
1217        }        }
# Line 461  while (!done) Line 1220  while (!done)
1220    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1221    
1222    else    else
1223    #endif  /* !defined NOPOSIX */
1224    
1225      {      {
1226      if (timeit)      if (timeit > 0)
1227        {        {
1228        register int i;        register int i;
1229        clock_t time_taken;        clock_t time_taken;
1230        clock_t start_time = clock();        clock_t start_time = clock();
1231        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1232          {          {
1233          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1234          if (re != NULL) free(re);          if (re != NULL) free(re);
1235          }          }
1236        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1237        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1238          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)timeit) /
1239              (double)CLOCKS_PER_SEC);
1240        }        }
1241    
1242      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1243    
1244      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1245      if non-interactive. */      if non-interactive. */
# Line 490  while (!done) Line 1252  while (!done)
1252          {          {
1253          for (;;)          for (;;)
1254            {            {
1255            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1256              {              {
1257              done = 1;              done = 1;
1258              goto CONTINUE;              goto CONTINUE;
# Line 501  while (!done) Line 1263  while (!done)
1263            }            }
1264          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1265          }          }
1266        continue;        goto CONTINUE;
1267          }
1268    
1269        /* Compilation succeeded; print data if required. There are now two
1270        info-returning functions. The old one has a limited interface and
1271        returns only limited data. Check that it agrees with the newer one. */
1272    
1273        if (log_store)
1274          fprintf(outfile, "Memory allocation (code space): %d\n",
1275            (int)(gotten_store -
1276                  sizeof(real_pcre) -
1277                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1278    
1279        /* Extract the size for possible writing before possibly flipping it,
1280        and remember the store that was got. */
1281    
1282        true_size = ((real_pcre *)re)->size;
1283        regex_gotten_store = gotten_store;
1284    
1285        /* If /S was present, study the regexp to generate additional info to
1286        help with the matching. */
1287    
1288        if (do_study)
1289          {
1290          if (timeit > 0)
1291            {
1292            register int i;
1293            clock_t time_taken;
1294            clock_t start_time = clock();
1295            for (i = 0; i < timeit; i++)
1296              extra = pcre_study(re, study_options, &error);
1297            time_taken = clock() - start_time;
1298            if (extra != NULL) free(extra);
1299            fprintf(outfile, "  Study time %.4f milliseconds\n",
1300              (((double)time_taken * 1000.0) / (double)timeit) /
1301                (double)CLOCKS_PER_SEC);
1302            }
1303          extra = pcre_study(re, study_options, &error);
1304          if (error != NULL)
1305            fprintf(outfile, "Failed to study: %s\n", error);
1306          else if (extra != NULL)
1307            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1308          }
1309    
1310        /* If the 'F' option was present, we flip the bytes of all the integer
1311        fields in the regex data block and the study block. This is to make it
1312        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1313        compiled on a different architecture. */
1314    
1315        if (do_flip)
1316          {
1317          real_pcre *rre = (real_pcre *)re;
1318          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1319          rre->size = byteflip(rre->size, sizeof(rre->size));
1320          rre->options = byteflip(rre->options, sizeof(rre->options));
1321          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1322          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1323          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1324          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1325          rre->name_table_offset = byteflip(rre->name_table_offset,
1326            sizeof(rre->name_table_offset));
1327          rre->name_entry_size = byteflip(rre->name_entry_size,
1328            sizeof(rre->name_entry_size));
1329          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1330    
1331          if (extra != NULL)
1332            {
1333            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1334            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1335            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1336            }
1337        }        }
1338    
1339      /* Compilation succeeded; print data if required */      /* Extract information from the compiled data if required */
1340    
1341        SHOW_INFO:
1342    
1343      if (showinfo || do_debug)      if (do_debug)
1344        {        {
1345        int first_char, count;        fprintf(outfile, "------------------------------------------------------------------\n");
1346          pcre_printint(re, outfile, debug_lengths);
1347          }
1348    
1349        if (debug || do_debug) print_internals(re, outfile);      if (do_showinfo)
1350          {
1351          unsigned long int get_options, all_options;
1352    #if !defined NOINFOCHECK
1353          int old_first_char, old_options, old_count;
1354    #endif
1355          int count, backrefmax, first_char, need_char, okpartial, jchanged;
1356          int nameentrysize, namecount;
1357          const uschar *nametable;
1358    
1359          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1360          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1361          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1362          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1363          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1364          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1365          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1366          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1367          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1368          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1369          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1370    
1371        count = pcre_info(re, &options, &first_char);  #if !defined NOINFOCHECK
1372          old_count = pcre_info(re, &old_options, &old_first_char);
1373        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1374          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
1375        else        else
1376          {          {
1377          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
1378          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1379            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
1380              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
1381              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
1382              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1383              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
1384              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
1385              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
1386              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1387              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              get_options, old_options);
1388          if (first_char == -1)          }
1389            {  #endif
1390            fprintf(outfile, "First char at start or follows \\n\n");  
1391            }        if (size != regex_gotten_store) fprintf(outfile,
1392          else if (first_char < 0)          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1393            (int)size, (int)regex_gotten_store);
1394    
1395          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1396          if (backrefmax > 0)
1397            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1398    
1399          if (namecount > 0)
1400            {
1401            fprintf(outfile, "Named capturing subpatterns:\n");
1402            while (namecount-- > 0)
1403            {            {
1404            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1405                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1406                GET2(nametable, 0));
1407              nametable += nameentrysize;
1408            }            }
1409            }
1410    
1411          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1412    
1413          all_options = ((real_pcre *)re)->options;
1414          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1415    
1416          if (get_options == 0) fprintf(outfile, "No options\n");
1417            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1418              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1419              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1420              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1421              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1422              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1423              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1424              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1425              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1426              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1427              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1428              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1429              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1430              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1431    
1432          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1433    
1434          switch (get_options & PCRE_NEWLINE_BITS)
1435            {
1436            case PCRE_NEWLINE_CR:
1437            fprintf(outfile, "Forced newline sequence: CR\n");
1438            break;
1439    
1440            case PCRE_NEWLINE_LF:
1441            fprintf(outfile, "Forced newline sequence: LF\n");
1442            break;
1443    
1444            case PCRE_NEWLINE_CRLF:
1445            fprintf(outfile, "Forced newline sequence: CRLF\n");
1446            break;
1447    
1448            case PCRE_NEWLINE_ANYCRLF:
1449            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1450            break;
1451    
1452            case PCRE_NEWLINE_ANY:
1453            fprintf(outfile, "Forced newline sequence: ANY\n");
1454            break;
1455    
1456            default:
1457            break;
1458            }
1459    
1460          if (first_char == -1)
1461            {
1462            fprintf(outfile, "First char at start or follows newline\n");
1463            }
1464          else if (first_char < 0)
1465            {
1466            fprintf(outfile, "No first char\n");
1467            }
1468          else
1469            {
1470            int ch = first_char & 255;
1471            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1472              "" : " (caseless)";
1473            if (PRINTHEX(ch))
1474              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1475            else
1476              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1477            }
1478    
1479          if (need_char < 0)
1480            {
1481            fprintf(outfile, "No need char\n");
1482            }
1483          else
1484            {
1485            int ch = need_char & 255;
1486            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1487              "" : " (caseless)";
1488            if (PRINTHEX(ch))
1489              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1490            else
1491              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1492            }
1493    
1494          /* Don't output study size; at present it is in any case a fixed
1495          value, but it varies, depending on the computer architecture, and
1496          so messes up the test suite. (And with the /F option, it might be
1497          flipped.) */
1498    
1499          if (do_study)
1500            {
1501            if (extra == NULL)
1502              fprintf(outfile, "Study returned NULL\n");
1503          else          else
1504            {            {
1505            if (isprint(first_char))            uschar *start_bits = NULL;
1506              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1507    
1508              if (start_bits == NULL)
1509                fprintf(outfile, "No starting byte set\n");
1510            else            else
1511              fprintf(outfile, "First char = %d\n", first_char);              {
1512                int i;
1513                int c = 24;
1514                fprintf(outfile, "Starting byte set: ");
1515                for (i = 0; i < 256; i++)
1516                  {
1517                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1518                    {
1519                    if (c > 75)
1520                      {
1521                      fprintf(outfile, "\n  ");
1522                      c = 2;
1523                      }
1524                    if (PRINTHEX(i) && i != ' ')
1525                      {
1526                      fprintf(outfile, "%c ", i);
1527                      c += 2;
1528                      }
1529                    else
1530                      {
1531                      fprintf(outfile, "\\x%02x ", i);
1532                      c += 5;
1533                      }
1534                    }
1535                  }
1536                fprintf(outfile, "\n");
1537                }
1538            }            }
1539          }          }
1540        }        }
1541    
1542      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1543      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1544        the study length, in big-endian order. */
1545    
1546      if (do_study)      if (to_file != NULL)
1547        {        {
1548        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1549          if (f == NULL)
1550          {          {
1551          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.2f milliseconds\n",  
           ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
1552          }          }
1553          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
1554          {          {
1555          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1556          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (true_size >> 24)  & 255;
1557            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1558            sbuf[2] = (true_size >>  8)  & 255;
1559            sbuf[3] = (true_size)  & 255;
1560    
1561            sbuf[4] = (true_study_size >> 24)  & 255;
1562            sbuf[5] = (true_study_size >> 16)  & 255;
1563            sbuf[6] = (true_study_size >>  8)  & 255;
1564            sbuf[7] = (true_study_size)  & 255;
1565    
1566            if (fwrite(sbuf, 1, 8, f) < 8 ||
1567                fwrite(re, 1, true_size, f) < true_size)
1568              {
1569              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1570              }
1571          else          else
1572            {            {
1573            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1574            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1575              {              {
1576              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1577                    true_study_size)
1578                {                {
1579                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1580                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1581                }                }
1582                else fprintf(outfile, "Study data written to %s\n", to_file);
1583    
1584              }              }
           fprintf(outfile, "\n");  
1585            }            }
1586            fclose(f);
1587          }          }
1588    
1589          new_free(re);
1590          if (extra != NULL) new_free(extra);
1591          if (tables != NULL) new_free((void *)tables);
1592          continue;  /* With next regex */
1593        }        }
1594      }      }        /* End of non-POSIX compile */
1595    
1596    /* Read data lines and test them */    /* Read data lines and test them */
1597    
1598    for (;;)    for (;;)
1599      {      {
1600      unsigned char *q;      uschar *q;
1601        uschar *bptr;
1602        int *use_offsets = offsets;
1603        int use_size_offsets = size_offsets;
1604        int callout_data = 0;
1605        int callout_data_set = 0;
1606      int count, c;      int count, c;
1607      int offsets[45];      int copystrings = 0;
1608      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
1609        int getstrings = 0;
1610        int getlist = 0;
1611        int gmatched = 0;
1612        int start_offset = 0;
1613        int g_notempty = 0;
1614        int use_dfa = 0;
1615    
1616      options = 0;      options = 0;
1617    
1618      if (infile == stdin) printf("  data> ");      *copynames = 0;
1619      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1620    
1621        copynamesptr = copynames;
1622        getnamesptr = getnames;
1623    
1624        pcre_callout = callout;
1625        first_callout = 1;
1626        callout_extra = 0;
1627        callout_count = 0;
1628        callout_fail_count = 999999;
1629        callout_fail_id = -1;
1630        show_malloc = 0;
1631    
1632        if (extra != NULL) extra->flags &=
1633          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1634    
1635        len = 0;
1636        for (;;)
1637        {        {
1638        done = 1;        if (infile == stdin) printf("data> ");
1639        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1640            {
1641            if (len > 0) break;
1642            done = 1;
1643            goto CONTINUE;
1644            }
1645          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1646          len = (int)strlen((char *)buffer);
1647          if (buffer[len-1] == '\n') break;
1648        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1649    
     len = (int)strlen((char *)buffer);  
1650      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1651      buffer[len] = 0;      buffer[len] = 0;
1652      if (len == 0) break;      if (len == 0) break;
# Line 637  while (!done) Line 1654  while (!done)
1654      p = buffer;      p = buffer;
1655      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1656    
1657      q = dbuffer;      bptr = q = dbuffer;
1658      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1659        {        {
1660        int i = 0;        int i = 0;
1661        int n = 0;        int n = 0;
1662    
1663        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1664          {          {
1665          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 658  while (!done) Line 1676  while (!done)
1676          c -= '0';          c -= '0';
1677          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1678            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1679    
1680    #if !defined NOUTF8
1681            if (use_utf8 && c > 255)
1682              {
1683              unsigned char buff8[8];
1684              int ii, utn;
1685              utn = ord2utf8(c, buff8);
1686              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1687              c = buff8[ii];   /* Last byte */
1688              }
1689    #endif
1690          break;          break;
1691    
1692          case 'x':          case 'x':
1693    
1694            /* Handle \x{..} specially - new Perl thing for utf8 */
1695    
1696    #if !defined NOUTF8
1697            if (*p == '{')
1698              {
1699              unsigned char *pt = p;
1700              c = 0;
1701              while (isxdigit(*(++pt)))
1702                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1703              if (*pt == '}')
1704                {
1705                unsigned char buff8[8];
1706                int ii, utn;
1707                utn = ord2utf8(c, buff8);
1708                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1709                c = buff8[ii];   /* Last byte */
1710                p = pt + 1;
1711                break;
1712                }
1713              /* Not correct form; fall through */
1714              }
1715    #endif
1716    
1717            /* Ordinary \x */
1718    
1719          c = 0;          c = 0;
1720          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1721            {            {
# Line 669  while (!done) Line 1724  while (!done)
1724            }            }
1725          break;          break;
1726    
1727          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1728          p--;          p--;
1729          continue;          continue;
1730    
1731            case '>':
1732            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1733            continue;
1734    
1735          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1736          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1737          continue;          continue;
# Line 681  while (!done) Line 1740  while (!done)
1740          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1741          continue;          continue;
1742    
1743            case 'C':
1744            if (isdigit(*p))    /* Set copy string */
1745              {
1746              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1747              copystrings |= 1 << n;
1748              }
1749            else if (isalnum(*p))
1750              {
1751              uschar *npp = copynamesptr;
1752              while (isalnum(*p)) *npp++ = *p++;
1753              *npp++ = 0;
1754              *npp = 0;
1755              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1756              if (n < 0)
1757                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1758              copynamesptr = npp;
1759              }
1760            else if (*p == '+')
1761              {
1762              callout_extra = 1;
1763              p++;
1764              }
1765            else if (*p == '-')
1766              {
1767              pcre_callout = NULL;
1768              p++;
1769              }
1770            else if (*p == '!')
1771              {
1772              callout_fail_id = 0;
1773              p++;
1774              while(isdigit(*p))
1775                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1776              callout_fail_count = 0;
1777              if (*p == '!')
1778                {
1779                p++;
1780                while(isdigit(*p))
1781                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1782                }
1783              }
1784            else if (*p == '*')
1785              {
1786              int sign = 1;
1787              callout_data = 0;
1788              if (*(++p) == '-') { sign = -1; p++; }
1789              while(isdigit(*p))
1790                callout_data = callout_data * 10 + *p++ - '0';
1791              callout_data *= sign;
1792              callout_data_set = 1;
1793              }
1794            continue;
1795    
1796    #if !defined NODFA
1797            case 'D':
1798    #if !defined NOPOSIX
1799            if (posix || do_posix)
1800              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1801            else
1802    #endif
1803              use_dfa = 1;
1804            continue;
1805    
1806            case 'F':
1807            options |= PCRE_DFA_SHORTEST;
1808            continue;
1809    #endif
1810    
1811            case 'G':
1812            if (isdigit(*p))
1813              {
1814              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1815              getstrings |= 1 << n;
1816              }
1817            else if (isalnum(*p))
1818              {
1819              uschar *npp = getnamesptr;
1820              while (isalnum(*p)) *npp++ = *p++;
1821              *npp++ = 0;
1822              *npp = 0;
1823              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1824              if (n < 0)
1825                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1826              getnamesptr = npp;
1827              }
1828            continue;
1829    
1830            case 'L':
1831            getlist = 1;
1832            continue;
1833    
1834            case 'M':
1835            find_match_limit = 1;
1836            continue;
1837    
1838            case 'N':
1839            options |= PCRE_NOTEMPTY;
1840            continue;
1841    
1842          case 'O':          case 'O':
1843          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1844          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1845              {
1846              size_offsets_max = n;
1847              free(offsets);
1848              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1849              if (offsets == NULL)
1850                {
1851                printf("** Failed to get %d bytes of memory for offsets vector\n",
1852                  (int)(size_offsets_max * sizeof(int)));
1853                yield = 1;
1854                goto EXIT;
1855                }
1856              }
1857            use_size_offsets = n;
1858            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1859            continue;
1860    
1861            case 'P':
1862            options |= PCRE_PARTIAL;
1863            continue;
1864    
1865            case 'Q':
1866            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1867            if (extra == NULL)
1868              {
1869              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1870              extra->flags = 0;
1871              }
1872            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1873            extra->match_limit_recursion = n;
1874            continue;
1875    
1876            case 'q':
1877            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1878            if (extra == NULL)
1879              {
1880              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1881              extra->flags = 0;
1882              }
1883            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1884            extra->match_limit = n;
1885            continue;
1886    
1887    #if !defined NODFA
1888            case 'R':
1889            options |= PCRE_DFA_RESTART;
1890            continue;
1891    #endif
1892    
1893            case 'S':
1894            show_malloc = 1;
1895          continue;          continue;
1896    
1897          case 'Z':          case 'Z':
1898          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1899          continue;          continue;
1900    
1901            case '?':
1902            options |= PCRE_NO_UTF8_CHECK;
1903            continue;
1904    
1905            case '<':
1906              {
1907              int x = check_newline(p, outfile);
1908              if (x == 0) goto NEXT_DATA;
1909              options |= x;
1910              while (*p++ != '>');
1911              }
1912            continue;
1913          }          }
1914        *q++ = c;        *q++ = c;
1915        }        }
1916      *q = 0;      *q = 0;
1917      len = q - dbuffer;      len = q - dbuffer;
1918    
1919        if ((all_use_dfa || use_dfa) && find_match_limit)
1920          {
1921          printf("**Match limit not relevant for DFA matching: ignored\n");
1922          find_match_limit = 0;
1923          }
1924    
1925      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1926      support timing. */      support timing or playing with the match limit or callout data. */
1927    
1928    #if !defined NOPOSIX
1929      if (posix || do_posix)      if (posix || do_posix)
1930        {        {
1931        int rc;        int rc;
1932        int eflags = 0;        int eflags = 0;
1933        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1934          if (use_size_offsets > 0)
1935            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1936        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1937        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1938    
1939        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1940    
1941        if (rc != 0)        if (rc != 0)
1942          {          {
1943          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1944          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1945          }          }
1946          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1947                  != 0)
1948            {
1949            fprintf(outfile, "Matched with REG_NOSUB\n");
1950            }
1951        else        else
1952          {          {
1953          size_t i;          size_t i;
1954          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1955            {            {
1956            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1957              {              {
1958              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1959              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1960                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1961              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1962                if (i == 0 && do_showrest)
1963                  {
1964                  fprintf(outfile, " 0+ ");
1965                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1966                    outfile);
1967                  fprintf(outfile, "\n");
1968                  }
1969              }              }
1970            }            }
1971          }          }
1972          free(pmatch);
1973        }        }
1974    
1975      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1976    
1977      else      else
1978    #endif  /* !defined NOPOSIX */
1979    
1980        for (;; gmatched++)    /* Loop for /g or /G */
1981        {        {
1982        if (timeit)        if (timeitm > 0)
1983          {          {
1984          register int i;          register int i;
1985          clock_t time_taken;          clock_t time_taken;
1986          clock_t start_time = clock();          clock_t start_time = clock();
1987          for (i = 0; i < 4000; i++)  
1988            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
1989              size_offsets);          if (all_use_dfa || use_dfa)
1990              {
1991              int workspace[1000];
1992              for (i = 0; i < timeitm; i++)
1993                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1994                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1995                  sizeof(workspace)/sizeof(int));
1996              }
1997            else
1998    #endif
1999    
2000            for (i = 0; i < timeitm; i++)
2001              count = pcre_exec(re, extra, (char *)bptr, len,
2002                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2003    
2004          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2005          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2006            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)timeitm) /
2007                (double)CLOCKS_PER_SEC);
2008            }
2009    
2010          /* If find_match_limit is set, we want to do repeated matches with
2011          varying limits in order to find the minimum value for the match limit and
2012          for the recursion limit. */
2013    
2014          if (find_match_limit)
2015            {
2016            if (extra == NULL)
2017              {
2018              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2019              extra->flags = 0;
2020              }
2021    
2022            (void)check_match_limit(re, extra, bptr, len, start_offset,
2023              options|g_notempty, use_offsets, use_size_offsets,
2024              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2025              PCRE_ERROR_MATCHLIMIT, "match()");
2026    
2027            count = check_match_limit(re, extra, bptr, len, start_offset,
2028              options|g_notempty, use_offsets, use_size_offsets,
2029              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2030              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2031            }
2032    
2033          /* If callout_data is set, use the interface with additional data */
2034    
2035          else if (callout_data_set)
2036            {
2037            if (extra == NULL)
2038              {
2039              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2040              extra->flags = 0;
2041              }
2042            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2043            extra->callout_data = &callout_data;
2044            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2045              options | g_notempty, use_offsets, use_size_offsets);
2046            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2047          }          }
2048    
2049        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
2050          size_offsets);        value of match_limit. */
2051    
2052    #if !defined NODFA
2053          else if (all_use_dfa || use_dfa)
2054            {
2055            int workspace[1000];
2056            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2057              options | g_notempty, use_offsets, use_size_offsets, workspace,
2058              sizeof(workspace)/sizeof(int));
2059            if (count == 0)
2060              {
2061              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2062              count = use_size_offsets/2;
2063              }
2064            }
2065    #endif
2066    
2067        if (count == 0)        else
2068          {          {
2069          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2070          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2071            if (count == 0)
2072              {
2073              fprintf(outfile, "Matched, but too many substrings\n");
2074              count = use_size_offsets/3;
2075              }
2076          }          }
2077    
2078          /* Matched */
2079    
2080        if (count >= 0)        if (count >= 0)
2081          {          {
2082          int i;          int i, maxcount;
2083          count *= 2;  
2084          for (i = 0; i < count; i += 2)  #if !defined NODFA
2085            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2086    #endif
2087              maxcount = use_size_offsets/3;
2088    
2089            /* This is a check against a lunatic return value. */
2090    
2091            if (count > maxcount)
2092              {
2093              fprintf(outfile,
2094                "** PCRE error: returned count %d is too big for offset size %d\n",
2095                count, use_size_offsets);
2096              count = use_size_offsets/3;
2097              if (do_g || do_G)
2098                {
2099                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2100                do_g = do_G = FALSE;        /* Break g/G loop */
2101                }
2102              }
2103    
2104            for (i = 0; i < count * 2; i += 2)
2105            {            {
2106            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2107              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2108            else            else
2109              {              {
2110              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2111              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2112                  use_offsets[i+1] - use_offsets[i], outfile);
2113              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2114                if (i == 0)
2115                  {
2116                  if (do_showrest)
2117                    {
2118                    fprintf(outfile, " 0+ ");
2119                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2120                      outfile);
2121                    fprintf(outfile, "\n");
2122                    }
2123                  }
2124                }
2125              }
2126    
2127            for (i = 0; i < 32; i++)
2128              {
2129              if ((copystrings & (1 << i)) != 0)
2130                {
2131                char copybuffer[256];
2132                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2133                  i, copybuffer, sizeof(copybuffer));
2134                if (rc < 0)
2135                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2136                else
2137                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2138                }
2139              }
2140    
2141            for (copynamesptr = copynames;
2142                 *copynamesptr != 0;
2143                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2144              {
2145              char copybuffer[256];
2146              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2147                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2148              if (rc < 0)
2149                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2150              else
2151                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2152              }
2153    
2154            for (i = 0; i < 32; i++)
2155              {
2156              if ((getstrings & (1 << i)) != 0)
2157                {
2158                const char *substring;
2159                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2160                  i, &substring);
2161                if (rc < 0)
2162                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
2163                else
2164                  {
2165                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2166                  pcre_free_substring(substring);
2167                  }
2168                }
2169              }
2170    
2171            for (getnamesptr = getnames;
2172                 *getnamesptr != 0;
2173                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2174              {
2175              const char *substring;
2176              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2177                count, (char *)getnamesptr, &substring);
2178              if (rc < 0)
2179                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2180              else
2181                {
2182                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2183                pcre_free_substring(substring);
2184              }              }
2185            }            }
2186    
2187            if (getlist)
2188              {
2189              const char **stringlist;
2190              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2191                &stringlist);
2192              if (rc < 0)
2193                fprintf(outfile, "get substring list failed %d\n", rc);
2194              else
2195                {
2196                for (i = 0; i < count; i++)
2197                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2198                if (stringlist[i] != NULL)
2199                  fprintf(outfile, "string list not terminated by NULL\n");
2200                /* free((void *)stringlist); */
2201                pcre_free_substring_list(stringlist);
2202                }
2203              }
2204            }
2205    
2206          /* There was a partial match */
2207    
2208          else if (count == PCRE_ERROR_PARTIAL)
2209            {
2210            fprintf(outfile, "Partial match");
2211    #if !defined NODFA
2212            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2213              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2214                bptr + use_offsets[0]);
2215    #endif
2216            fprintf(outfile, "\n");
2217            break;  /* Out of the /g loop */
2218          }          }
2219    
2220          /* Failed to match. If this is a /g or /G loop and we previously set
2221          g_notempty after a null match, this is not necessarily the end. We want
2222          to advance the start offset, and continue. We won't be at the end of the
2223          string - that was checked before setting g_notempty.
2224    
2225          Complication arises in the case when the newline option is "any" or
2226          "anycrlf". If the previous match was at the end of a line terminated by
2227          CRLF, an advance of one character just passes the \r, whereas we should
2228          prefer the longer newline sequence, as does the code in pcre_exec().
2229          Fudge the offset value to achieve this.
2230    
2231          Otherwise, in the case of UTF-8 matching, the advance must be one
2232          character, not one byte. */
2233    
2234        else        else
2235          {          {
2236          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2237              {
2238              int onechar = 1;
2239              unsigned int obits = ((real_pcre *)re)->options;
2240              use_offsets[0] = start_offset;
2241              if ((obits & PCRE_NEWLINE_BITS) == 0)
2242                {
2243                int d;
2244                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2245                obits = (d == '\r')? PCRE_NEWLINE_CR :
2246                        (d == '\n')? PCRE_NEWLINE_LF :
2247                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2248                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2249                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2250                }
2251              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2252                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2253                  &&
2254                  start_offset < len - 1 &&
2255                  bptr[start_offset] == '\r' &&
2256                  bptr[start_offset+1] == '\n')
2257                onechar++;
2258              else if (use_utf8)
2259                {
2260                while (start_offset + onechar < len)
2261                  {
2262                  int tb = bptr[start_offset+onechar];
2263                  if (tb <= 127) break;
2264                  tb &= 0xc0;
2265                  if (tb != 0 && tb != 0xc0) onechar++;
2266                  }
2267                }
2268              use_offsets[1] = start_offset + onechar;
2269              }
2270            else
2271              {
2272              if (count == PCRE_ERROR_NOMATCH)
2273                {
2274                if (gmatched == 0) fprintf(outfile, "No match\n");
2275                }
2276            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2277              break;  /* Out of the /g loop */
2278              }
2279          }          }
2280        }  
2281      }        /* If not /g or /G we are done */
2282    
2283          if (!do_g && !do_G) break;
2284    
2285          /* If we have matched an empty string, first check to see if we are at
2286          the end of the subject. If so, the /g loop is over. Otherwise, mimic
2287          what Perl's /g options does. This turns out to be rather cunning. First
2288          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2289          same point. If this fails (picked up above) we advance to the next
2290          character. */
2291    
2292          g_notempty = 0;
2293    
2294          if (use_offsets[0] == use_offsets[1])
2295            {
2296            if (use_offsets[0] == len) break;
2297            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2298            }
2299    
2300          /* For /g, update the start offset, leaving the rest alone */
2301    
2302          if (do_g) start_offset = use_offsets[1];
2303    
2304          /* For /G, update the pointer and length */
2305    
2306          else
2307            {
2308            bptr += use_offsets[1];
2309            len -= use_offsets[1];
2310            }
2311          }  /* End of loop for /g and /G */
2312    
2313        NEXT_DATA: continue;
2314        }    /* End of loop for data lines */
2315    
2316    CONTINUE:    CONTINUE:
2317    
2318    #if !defined NOPOSIX
2319    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2320    if (re != NULL) free(re);  #endif
2321    if (extra != NULL) free(extra);  
2322      if (re != NULL) new_free(re);
2323      if (extra != NULL) new_free(extra);
2324      if (tables != NULL)
2325        {
2326        new_free((void *)tables);
2327        setlocale(LC_CTYPE, "C");
2328        locale_set = 0;
2329        }
2330    }    }
2331    
2332  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2333  return 0;  
2334    EXIT:
2335    
2336    if (infile != NULL && infile != stdin) fclose(infile);
2337    if (outfile != NULL && outfile != stdout) fclose(outfile);
2338    
2339    free(buffer);
2340    free(dbuffer);
2341    free(pbuffer);
2342    free(offsets);
2343    
2344    return yield;
2345  }  }
2346    
2347  /* End */  /* End of pcretest.c */

Legend:
Removed from v.23  
changed lines
  Added in v.169

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12