/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 7 by nigel, Sat Feb 24 21:38:09 2007 UTC revision 147 by ph10, Mon Apr 16 13:24:37 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44    #include <locale.h>
45    #include <errno.h>
46    
47    
48    /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
69    
70    /* We have to include pcre_internal.h because we need the internal info for
71    displaying the results of pcre_study() and we also need to know about the
72    internal macros, structures, and other internal data values; pcretest has
73    "inside information" compared to a program that strictly follows the PCRE API.
74    
75    Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80    #include "pcre_internal.h"
81    
82    /* We need access to the data tables that PCRE uses. So as not to have to keep
83    two copies, we include the source file here, changing the names of the external
84    symbols to prevent clashes. */
85    
86    #define _pcre_utf8_table1      utf8_table1
87    #define _pcre_utf8_table1_size utf8_table1_size
88    #define _pcre_utf8_table2      utf8_table2
89    #define _pcre_utf8_table3      utf8_table3
90    #define _pcre_utf8_table4      utf8_table4
91    #define _pcre_utt              utt
92    #define _pcre_utt_size         utt_size
93    #define _pcre_OP_lengths       OP_lengths
94    
95    #include "pcre_tables.c"
96    
97  /* Use the internal info for displaying the results of pcre_study(). */  /* We also need the pcre_printint() function for printing out compiled
98    patterns. This function is in a separate file so that it can be included in
99    pcre_compile.c when that module is compiled with debugging enabled.
100    
101  #include "internal.h"  The definition of the macro PRINTABLE, which determines whether to print an
102    output character as-is or as a hex value when showing compiled patterns, is
103    contained in this file. We uses it here also, in cases when the locale has not
104    been explicitly changed, so as to get consistent output from systems that
105    differ in their output from isprint() even in the "C" locale. */
106    
107    #include "pcre_printint.src"
108    
109    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110    
111    
112    /* It is possible to compile this test program without including support for
113    testing the POSIX interface, though this is not available via the standard
114    Makefile. */
115    
116    #if !defined NOPOSIX
117  #include "pcreposix.h"  #include "pcreposix.h"
118    #endif
119    
120    /* It is also possible, for the benefit of the version currently imported into
121    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122    interface to the DFA matcher (NODFA), and without the doublecheck of the old
123    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124    UTF8 support if PCRE is built without it. */
125    
126    #ifndef SUPPORT_UTF8
127    #ifndef NOUTF8
128    #define NOUTF8
129    #endif
130    #endif
131    
132    
133    /* Other parameters */
134    
135  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
136  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 140 
140  #endif  #endif
141  #endif  #endif
142    
143    /* This is the default loop count for timing. */
144    
145    #define LOOPREPEAT 500000
146    
147    /* Static variables */
148    
149  static FILE *outfile;  static FILE *outfile;
150  static int log_store = 0;  static int log_store = 0;
151    static int callout_count;
152    static int callout_extra;
153    static int callout_fail_count;
154    static int callout_fail_id;
155    static int first_callout;
156    static int locale_set = 0;
157    static int show_malloc;
158    static int use_utf8;
159    static size_t gotten_store;
160    
161    /* The buffers grow automatically if very long input lines are encountered. */
162    
163    static int buffer_size = 50000;
164    static uschar *buffer = NULL;
165    static uschar *dbuffer = NULL;
166    static uschar *pbuffer = NULL;
167    
168    
169    
170  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
171  code as contained in pcre.c under the DEBUG macro. */  *        Read or extend an input line            *
172    *************************************************/
173    
174  static const char *OP_names[] = {  /* Input lines are read into buffer, but both patterns and data lines can be
175    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  continued over multiple input lines. In addition, if the buffer fills up, we
176    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  want to automatically expand it so as to be able to handle extremely large
177    "not",  lines that are needed for certain stress tests. When the input buffer is
178    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  expanded, the other two buffers must also be expanded likewise, and the
179    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  contents of pbuffer, which are a copy of the input for callouts, must be
180    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  preserved (for when expansion happens for a data line). This is not the most
181    "*", "*?", "+", "+?", "?", "??", "{", "{",  optimal way of handling this, but hey, this is just a test program!
182    "class", "Ref",  
183    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  Arguments:
184    "Brazero", "Braminzero", "Bra"    f            the file to read
185  };    start        where in buffer to start (this *must* be within buffer)
186    
187    Returns:       pointer to the start of new data
188  static void print_internals(pcre *re)                 could be a copy of start, or could be moved
189  {                 NULL if no data read and EOF reached
190  unsigned char *code = ((real_pcre *)re)->code;  */
   
 printf("------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   printf("%3d ", code - ((real_pcre *)re)->code);  
   
   if (*code >= OP_BRA)  
     {  
     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     printf("    %s\n", OP_names[*code]);  
     printf("------------------------------------------------------------------\n");  
     return;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     printf("%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       printf("    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) printf("    %c", c);  
       else printf("    \\x%02x", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     printf("    \\%d", *(++code));  
     break;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
   
       code++;  
       printf("    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
191    
192          case OP_CRRANGE:  static uschar *
193          case OP_CRMINRANGE:  extend_inputline(FILE *f, uschar *start)
194          min = (code[1] << 8) + code[2];  {
195          max = (code[3] << 8) + code[4];  uschar *here = start;
         if (max == 0) printf("{%d,}", min);  
         else printf("{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) printf("?");  
         code += 4;  
         break;  
196    
197          default:  for (;;)
198          code--;    {
199          }    int rlen = buffer_size - (here - buffer);
200    
201      if (rlen > 1000)
202        {
203        int dlen;
204        if (fgets((char *)here, rlen,  f) == NULL)
205          return (here == start)? NULL : start;
206        dlen = (int)strlen((char *)here);
207        if (dlen > 0 && here[dlen - 1] == '\n') return start;
208        here += dlen;
209        }
210    
211      else
212        {
213        int new_buffer_size = 2*buffer_size;
214        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
215        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
216        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
217    
218        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
219          {
220          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
221          exit(1);
222        }        }
     break;  
223    
224      /* Anything else is just a one-node item */      memcpy(new_buffer, buffer, buffer_size);
225        memcpy(new_pbuffer, pbuffer, buffer_size);
226    
227        buffer_size = new_buffer_size;
228    
229      default:      start = new_buffer + (start - buffer);
230      printf("    %s", OP_names[*code]);      here = new_buffer + (here - buffer);
231      break;  
232        free(buffer);
233        free(dbuffer);
234        free(pbuffer);
235    
236        buffer = new_buffer;
237        dbuffer = new_dbuffer;
238        pbuffer = new_pbuffer;
239      }      }
240      }
241    
242    return NULL;  /* Control never gets here */
243    }
244    
245    
246    
247    
248    
249    code++;  
250    printf("\n");  
251    /*************************************************
252    *          Read number from string               *
253    *************************************************/
254    
255    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256    around with conditional compilation, just do the job by hand. It is only used
257    for unpicking arguments, so just keep it simple.
258    
259    Arguments:
260      str           string to be converted
261      endptr        where to put the end pointer
262    
263    Returns:        the unsigned long
264    */
265    
266    static int
267    get_value(unsigned char *str, unsigned char **endptr)
268    {
269    int result = 0;
270    while(*str != 0 && isspace(*str)) str++;
271    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
272    *endptr = str;
273    return(result);
274    }
275    
276    
277    
278    
279    /*************************************************
280    *            Convert UTF-8 string to value       *
281    *************************************************/
282    
283    /* This function takes one or more bytes that represents a UTF-8 character,
284    and returns the value of the character.
285    
286    Argument:
287      utf8bytes   a pointer to the byte vector
288      vptr        a pointer to an int to receive the value
289    
290    Returns:      >  0 => the number of bytes consumed
291                  -6 to 0 => malformed UTF-8 character at offset = (-return)
292    */
293    
294    #if !defined NOUTF8
295    
296    static int
297    utf82ord(unsigned char *utf8bytes, int *vptr)
298    {
299    int c = *utf8bytes++;
300    int d = c;
301    int i, j, s;
302    
303    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
304      {
305      if ((d & 0x80) == 0) break;
306      d <<= 1;
307      }
308    
309    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
310    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
311    
312    /* i now has a value in the range 1-5 */
313    
314    s = 6*i;
315    d = (c & utf8_table3[i]) << s;
316    
317    for (j = 0; j < i; j++)
318      {
319      c = *utf8bytes++;
320      if ((c & 0xc0) != 0x80) return -(j+1);
321      s -= 6;
322      d |= (c & 0x3f) << s;
323    }    }
324    
325    /* Check that encoding was the correct unique one */
326    
327    for (j = 0; j < utf8_table1_size; j++)
328      if (d <= utf8_table1[j]) break;
329    if (j != i) return -(i+1);
330    
331    /* Valid value */
332    
333    *vptr = d;
334    return i+1;
335    }
336    
337    #endif
338    
339    
340    
341    /*************************************************
342    *       Convert character value to UTF-8         *
343    *************************************************/
344    
345    /* This function takes an integer value in the range 0 - 0x7fffffff
346    and encodes it as a UTF-8 character in 0 to 6 bytes.
347    
348    Arguments:
349      cvalue     the character value
350      utf8bytes  pointer to buffer for result - at least 6 bytes long
351    
352    Returns:     number of characters placed in the buffer
353    */
354    
355    #if !defined NOUTF8
356    
357    static int
358    ord2utf8(int cvalue, uschar *utf8bytes)
359    {
360    register int i, j;
361    for (i = 0; i < utf8_table1_size; i++)
362      if (cvalue <= utf8_table1[i]) break;
363    utf8bytes += i;
364    for (j = i; j > 0; j--)
365     {
366     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
367     cvalue >>= 6;
368     }
369    *utf8bytes = utf8_table2[i] | cvalue;
370    return i + 1;
371  }  }
372    
373    #endif
374    
375    
376    
377  /* Character string printing function. */  /*************************************************
378    *             Print character string             *
379    *************************************************/
380    
381  static void pchars(unsigned char *p, int length)  /* Character string printing function. Must handle UTF-8 strings in utf8
382    mode. Yields number of characters printed. If handed a NULL file, just counts
383    chars without printing. */
384    
385    static int pchars(unsigned char *p, int length, FILE *f)
386  {  {
387  int c;  int c = 0;
388    int yield = 0;
389    
390  while (length-- > 0)  while (length-- > 0)
391    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
392      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
393      if (use_utf8)
394        {
395        int rc = utf82ord(p, &c);
396    
397        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
398          {
399          length -= rc - 1;
400          p += rc;
401          if (PRINTHEX(c))
402            {
403            if (f != NULL) fprintf(f, "%c", c);
404            yield++;
405            }
406          else
407            {
408            int n = 4;
409            if (f != NULL) fprintf(f, "\\x{%02x}", c);
410            yield += (n <= 0x000000ff)? 2 :
411                     (n <= 0x00000fff)? 3 :
412                     (n <= 0x0000ffff)? 4 :
413                     (n <= 0x000fffff)? 5 : 6;
414            }
415          continue;
416          }
417        }
418    #endif
419    
420       /* Not UTF-8, or malformed UTF-8  */
421    
422      c = *p++;
423      if (PRINTHEX(c))
424        {
425        if (f != NULL) fprintf(f, "%c", c);
426        yield++;
427        }
428      else
429        {
430        if (f != NULL) fprintf(f, "\\x%02x", c);
431        yield += 4;
432        }
433      }
434    
435    return yield;
436  }  }
437    
438    
439    
440    /*************************************************
441    *              Callout function                  *
442    *************************************************/
443    
444    /* Called from PCRE as a result of the (?C) item. We print out where we are in
445    the match. Yield zero unless more callouts than the fail count, or the callout
446    data is not zero. */
447    
448    static int callout(pcre_callout_block *cb)
449    {
450    FILE *f = (first_callout | callout_extra)? outfile : NULL;
451    int i, pre_start, post_start, subject_length;
452    
453    if (callout_extra)
454      {
455      fprintf(f, "Callout %d: last capture = %d\n",
456        cb->callout_number, cb->capture_last);
457    
458      for (i = 0; i < cb->capture_top * 2; i += 2)
459        {
460        if (cb->offset_vector[i] < 0)
461          fprintf(f, "%2d: <unset>\n", i/2);
462        else
463          {
464          fprintf(f, "%2d: ", i/2);
465          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
466            cb->offset_vector[i+1] - cb->offset_vector[i], f);
467          fprintf(f, "\n");
468          }
469        }
470      }
471    
472    /* Re-print the subject in canonical form, the first time or if giving full
473    datails. On subsequent calls in the same match, we use pchars just to find the
474    printed lengths of the substrings. */
475    
476    if (f != NULL) fprintf(f, "--->");
477    
478    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
479    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
480      cb->current_position - cb->start_match, f);
481    
482    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
483    
484    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
485      cb->subject_length - cb->current_position, f);
486    
487    if (f != NULL) fprintf(f, "\n");
488    
489    /* Always print appropriate indicators, with callout number if not already
490    shown. For automatic callouts, show the pattern offset. */
491    
492    if (cb->callout_number == 255)
493      {
494      fprintf(outfile, "%+3d ", cb->pattern_position);
495      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
496      }
497    else
498      {
499      if (callout_extra) fprintf(outfile, "    ");
500        else fprintf(outfile, "%3d ", cb->callout_number);
501      }
502    
503    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
504    fprintf(outfile, "^");
505    
506    if (post_start > 0)
507      {
508      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
509      fprintf(outfile, "^");
510      }
511    
512    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
513      fprintf(outfile, " ");
514    
515    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
516      pbuffer + cb->pattern_position);
517    
518    fprintf(outfile, "\n");
519    first_callout = 0;
520    
521    if (cb->callout_data != NULL)
522      {
523      int callout_data = *((int *)(cb->callout_data));
524      if (callout_data != 0)
525        {
526        fprintf(outfile, "Callout data = %d\n", callout_data);
527        return callout_data;
528        }
529      }
530    
531    return (cb->callout_number != callout_fail_id)? 0 :
532           (++callout_count >= callout_fail_count)? 1 : 0;
533    }
534    
535    
536    /*************************************************
537    *            Local malloc functions              *
538    *************************************************/
539    
540  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
541  compiled re. */  compiled re. */
542    
543  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
544  {  {
545  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
546  return malloc(size);  gotten_store = size;
547    if (show_malloc)
548      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
549    return block;
550    }
551    
552    static void new_free(void *block)
553    {
554    if (show_malloc)
555      fprintf(outfile, "free             %p\n", block);
556    free(block);
557    }
558    
559    
560    /* For recursion malloc/free, to test stacking calls */
561    
562    static void *stack_malloc(size_t size)
563    {
564    void *block = malloc(size);
565    if (show_malloc)
566      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
567    return block;
568    }
569    
570    static void stack_free(void *block)
571    {
572    if (show_malloc)
573      fprintf(outfile, "stack_free       %p\n", block);
574    free(block);
575    }
576    
577    
578    /*************************************************
579    *          Call pcre_fullinfo()                  *
580    *************************************************/
581    
582    /* Get one piece of information from the pcre_fullinfo() function */
583    
584    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
585    {
586    int rc;
587    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
588      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
589  }  }
590    
591    
592    
593    /*************************************************
594    *         Byte flipping function                 *
595    *************************************************/
596    
597    static unsigned long int
598    byteflip(unsigned long int value, int n)
599    {
600    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
601    return ((value & 0x000000ff) << 24) |
602           ((value & 0x0000ff00) <<  8) |
603           ((value & 0x00ff0000) >>  8) |
604           ((value & 0xff000000) >> 24);
605    }
606    
607    
608    
609    
610    /*************************************************
611    *        Check match or recursion limit          *
612    *************************************************/
613    
614    static int
615    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
616      int start_offset, int options, int *use_offsets, int use_size_offsets,
617      int flag, unsigned long int *limit, int errnumber, const char *msg)
618    {
619    int count;
620    int min = 0;
621    int mid = 64;
622    int max = -1;
623    
624    extra->flags |= flag;
625    
626    for (;;)
627      {
628      *limit = mid;
629    
630      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
631        use_offsets, use_size_offsets);
632    
633      if (count == errnumber)
634        {
635        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
636        min = mid;
637        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
638        }
639    
640      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
641                             count == PCRE_ERROR_PARTIAL)
642        {
643        if (mid == min + 1)
644          {
645          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
646          break;
647          }
648        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
649        max = mid;
650        mid = (min + mid)/2;
651        }
652      else break;    /* Some other error */
653      }
654    
655    extra->flags &= ~flag;
656    return count;
657    }
658    
659    
660    
661    /*************************************************
662    *         Check newline indicator                *
663    *************************************************/
664    
665    /* This is used both at compile and run-time to check for <xxx> escapes, where
666    xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
667    
668    Arguments:
669      p           points after the leading '<'
670      f           file for error message
671    
672    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
673    */
674    
675    static int
676    check_newline(uschar *p, FILE *f)
677    {
678    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
679    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
680    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
681    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
682    fprintf(f, "Unknown newline type at: <%s\n", p);
683    return 0;
684    }
685    
686    
687    
688    /*************************************************
689    *             Usage function                     *
690    *************************************************/
691    
692    static void
693    usage(void)
694    {
695    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
696    printf("  -b       show compiled code (bytecode)\n");
697    printf("  -C       show PCRE compile-time options and exit\n");
698    printf("  -d       debug: show compiled code and information (-b and -i)\n");
699    #if !defined NODFA
700    printf("  -dfa     force DFA matching for all subjects\n");
701    #endif
702    printf("  -help    show usage information\n");
703    printf("  -i       show information about compiled patterns\n"
704           "  -m       output memory used information\n"
705           "  -o <n>   set size of offsets vector to <n>\n");
706    #if !defined NOPOSIX
707    printf("  -p       use POSIX interface\n");
708    #endif
709    printf("  -q       quiet: do not output PCRE version number at start\n");
710    printf("  -S <n>   set stack size to <n> megabytes\n");
711    printf("  -s       output store (memory) used information\n"
712           "  -t       time compilation and execution\n");
713    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
714    printf("  -tm      time execution (matching) only\n");
715    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
716    }
717    
718    
719    
720    /*************************************************
721    *                Main Program                    *
722    *************************************************/
723    
724  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
725  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
726  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 262  int options = 0; Line 732  int options = 0;
732  int study_options = 0;  int study_options = 0;
733  int op = 1;  int op = 1;
734  int timeit = 0;  int timeit = 0;
735    int timeitm = 0;
736  int showinfo = 0;  int showinfo = 0;
737    int showstore = 0;
738    int quiet = 0;
739    int size_offsets = 45;
740    int size_offsets_max;
741    int *offsets = NULL;
742    #if !defined NOPOSIX
743  int posix = 0;  int posix = 0;
744    #endif
745  int debug = 0;  int debug = 0;
746  unsigned char buffer[30000];  int done = 0;
747  unsigned char dbuffer[1024];  int all_use_dfa = 0;
748    int yield = 0;
749    int stack_size;
750    
751    /* These vectors store, end-to-end, a list of captured substring names. Assume
752    that 1024 is plenty long enough for the few names we'll be testing. */
753    
754    uschar copynames[1024];
755    uschar getnames[1024];
756    
757    uschar *copynamesptr;
758    uschar *getnamesptr;
759    
760  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
761    when I am debugging. They grow automatically when very long lines are read. */
762    
763    buffer = (unsigned char *)malloc(buffer_size);
764    dbuffer = (unsigned char *)malloc(buffer_size);
765    pbuffer = (unsigned char *)malloc(buffer_size);
766    
767    /* The outfile variable is static so that new_malloc can use it. */
768    
769  outfile = stdout;  outfile = stdout;
770    
771    /* The following  _setmode() stuff is some Windows magic that tells its runtime
772    library to translate CRLF into a single LF character. At least, that's what
773    I've been told: never having used Windows I take this all on trust. Originally
774    it set 0x8000, but then I was advised that _O_BINARY was better. */
775    
776    #if defined(_WIN32) || defined(WIN32)
777    _setmode( _fileno( stdout ), _O_BINARY );
778    #endif
779    
780  /* Scan options */  /* Scan options */
781    
782  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
783    {    {
784    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
785    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
786      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
787        showstore = 1;
788      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
789      else if (strcmp(argv[op], "-b") == 0) debug = 1;
790    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
791    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
792    #if !defined NODFA
793      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
794    #endif
795      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
796          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
797            *endptr == 0))
798        {
799        op++;
800        argc--;
801        }
802      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
803        {
804        int both = argv[op][2] == 0;
805        int temp;
806        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
807                         *endptr == 0))
808          {
809          timeitm = temp;
810          op++;
811          argc--;
812          }
813        else timeitm = LOOPREPEAT;
814        if (both) timeit = timeitm;
815        }
816      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
817          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
818            *endptr == 0))
819        {
820    #if defined(_WIN32) || defined(WIN32)
821        printf("PCRE: -S not supported on this OS\n");
822        exit(1);
823    #else
824        int rc;
825        struct rlimit rlim;
826        getrlimit(RLIMIT_STACK, &rlim);
827        rlim.rlim_cur = stack_size * 1024 * 1024;
828        rc = setrlimit(RLIMIT_STACK, &rlim);
829        if (rc != 0)
830          {
831        printf("PCRE: setrlimit() failed with error %d\n", rc);
832        exit(1);
833          }
834        op++;
835        argc--;
836    #endif
837        }
838    #if !defined NOPOSIX
839    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
840    #endif
841      else if (strcmp(argv[op], "-C") == 0)
842        {
843        int rc;
844        printf("PCRE version %s\n", pcre_version());
845        printf("Compiled with\n");
846        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
847        printf("  %sUTF-8 support\n", rc? "" : "No ");
848        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
849        printf("  %sUnicode properties support\n", rc? "" : "No ");
850        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
851        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
852          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
853          (rc == -1)? "ANY" : "???");
854        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
855        printf("  Internal link size = %d\n", rc);
856        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
857        printf("  POSIX malloc threshold = %d\n", rc);
858        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
859        printf("  Default match limit = %d\n", rc);
860        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
861        printf("  Default recursion depth limit = %d\n", rc);
862        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
863        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
864        goto EXIT;
865        }
866      else if (strcmp(argv[op], "-help") == 0 ||
867               strcmp(argv[op], "--help") == 0)
868        {
869        usage();
870        goto EXIT;
871        }
872    else    else
873      {      {
874      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
875      return 1;      usage();
876        yield = 1;
877        goto EXIT;
878      }      }
879    op++;    op++;
880    argc--;    argc--;
881    }    }
882    
883    /* Get the store for the offsets vector, and remember what it was */
884    
885    size_offsets_max = size_offsets;
886    offsets = (int *)malloc(size_offsets_max * sizeof(int));
887    if (offsets == NULL)
888      {
889      printf("** Failed to get %d bytes of memory for offsets vector\n",
890        size_offsets_max * sizeof(int));
891      yield = 1;
892      goto EXIT;
893      }
894    
895  /* Sort out the input and output files */  /* Sort out the input and output files */
896    
897  if (argc > 1)  if (argc > 1)
898    {    {
899    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
900    if (infile == NULL)    if (infile == NULL)
901      {      {
902      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
903      return 1;      yield = 1;
904        goto EXIT;
905      }      }
906    }    }
907    
908  if (argc > 2)  if (argc > 2)
909    {    {
910    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
911    if (outfile == NULL)    if (outfile == NULL)
912      {      {
913      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
914      return 1;      yield = 1;
915        goto EXIT;
916      }      }
917    }    }
918    
919  /* Set alternative malloc function */  /* Set alternative malloc function */
920    
921  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
922    pcre_free = new_free;
923    pcre_stack_malloc = stack_malloc;
924    pcre_stack_free = stack_free;
925    
926  /* Heading line, then prompt for first re if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
927    
928  fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
 fprintf(outfile, "PCRE version %s\n\n", pcre_version());  
929    
930  /* Main loop */  /* Main loop */
931    
932  for (;;)  while (!done)
933    {    {
934    pcre *re = NULL;    pcre *re = NULL;
935    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
936    
937    #if !defined NOPOSIX  /* There are still compilers that require no indent */
938    regex_t preg;    regex_t preg;
939      int do_posix = 0;
940    #endif
941    
942    const char *error;    const char *error;
943    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
944      unsigned char *to_file = NULL;
945      const unsigned char *tables = NULL;
946      unsigned long int true_size, true_study_size = 0;
947      size_t size, regex_gotten_store;
948    int do_study = 0;    int do_study = 0;
949    int do_debug = 0;    int do_debug = debug;
950    int do_posix = 0;    int debug_lengths = 1;
951    int erroroffset, len, delimiter;    int do_G = 0;
952      int do_g = 0;
953      int do_showinfo = showinfo;
954      int do_showrest = 0;
955      int do_flip = 0;
956      int erroroffset, len, delimiter, poffset;
957    
958      use_utf8 = 0;
959    
960    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
961    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
962    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
963      fflush(outfile);
964    
965    p = buffer;    p = buffer;
966    while (isspace(*p)) p++;    while (isspace(*p)) p++;
967    if (*p == 0) continue;    if (*p == 0) continue;
968    
969    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
970    complete, read more. */  
971      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
972        {
973        unsigned long int magic, get_options;
974        uschar sbuf[8];
975        FILE *f;
976    
977        p++;
978        pp = p + (int)strlen((char *)p);
979        while (isspace(pp[-1])) pp--;
980        *pp = 0;
981    
982        f = fopen((char *)p, "rb");
983        if (f == NULL)
984          {
985          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
986          continue;
987          }
988    
989        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
990    
991        true_size =
992          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
993        true_study_size =
994          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
995    
996        re = (real_pcre *)new_malloc(true_size);
997        regex_gotten_store = gotten_store;
998    
999        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1000    
1001        magic = ((real_pcre *)re)->magic_number;
1002        if (magic != MAGIC_NUMBER)
1003          {
1004          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1005            {
1006            do_flip = 1;
1007            }
1008          else
1009            {
1010            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1011            fclose(f);
1012            continue;
1013            }
1014          }
1015    
1016        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1017          do_flip? " (byte-inverted)" : "", p);
1018    
1019        /* Need to know if UTF-8 for printing data strings */
1020    
1021        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1022        use_utf8 = (get_options & PCRE_UTF8) != 0;
1023    
1024        /* Now see if there is any following study data */
1025    
1026        if (true_study_size != 0)
1027          {
1028          pcre_study_data *psd;
1029    
1030          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1031          extra->flags = PCRE_EXTRA_STUDY_DATA;
1032    
1033          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1034          extra->study_data = psd;
1035    
1036          if (fread(psd, 1, true_study_size, f) != true_study_size)
1037            {
1038            FAIL_READ:
1039            fprintf(outfile, "Failed to read data from %s\n", p);
1040            if (extra != NULL) new_free(extra);
1041            if (re != NULL) new_free(re);
1042            fclose(f);
1043            continue;
1044            }
1045          fprintf(outfile, "Study data loaded from %s\n", p);
1046          do_study = 1;     /* To get the data output if requested */
1047          }
1048        else fprintf(outfile, "No study data\n");
1049    
1050        fclose(f);
1051        goto SHOW_INFO;
1052        }
1053    
1054      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1055      the pattern; if is isn't complete, read more. */
1056    
1057    delimiter = *p++;    delimiter = *p++;
1058    
1059    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
1060      {      {
1061      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1062      goto SKIP_DATA;      goto SKIP_DATA;
1063      }      }
1064    
1065    pp = p;    pp = p;
1066      poffset = p - buffer;
1067    
1068    for(;;)    for(;;)
1069      {      {
1070      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
     if (*pp != 0) break;  
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
1071        {        {
1072        fprintf(outfile, "** Expression too long - missing delimiter?\n");        if (*pp == '\\' && pp[1] != 0) pp++;
1073        goto SKIP_DATA;          else if (*pp == delimiter) break;
1074          pp++;
1075        }        }
1076        if (*pp != 0) break;
1077      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1078      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1079        {        {
1080        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1081        goto END_OFF;        done = 1;
1082          goto CONTINUE;
1083        }        }
1084      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1085      }      }
1086    
1087    /* Terminate the pattern at the delimiter */    /* The buffer may have moved while being extended; reset the start of data
1088      pointer to the correct relative point in the buffer. */
1089    
1090      p = buffer + poffset;
1091    
1092      /* If the first character after the delimiter is backslash, make
1093      the pattern end with backslash. This is purely to provide a way
1094      of testing for the error message when a pattern ends with backslash. */
1095    
1096      if (pp[1] == '\\') *pp++ = '\\';
1097    
1098      /* Terminate the pattern at the delimiter, and save a copy of the pattern
1099      for callouts. */
1100    
1101    *pp++ = 0;    *pp++ = 0;
1102      strcpy((char *)pbuffer, (char *)p);
1103    
1104    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1105    
1106    options = 0;    options = 0;
1107    study_options = 0;    study_options = 0;
1108      log_store = showstore;  /* default from command line */
1109    
1110    while (*pp != 0)    while (*pp != 0)
1111      {      {
1112      switch (*pp++)      switch (*pp++)
1113        {        {
1114          case 'f': options |= PCRE_FIRSTLINE; break;
1115          case 'g': do_g = 1; break;
1116        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1117        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1118        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1119        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1120    
1121          case '+': do_showrest = 1; break;
1122        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1123        case 'D': do_debug = 1; break;        case 'B': do_debug = 1; break;
1124          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1125          case 'D': do_debug = do_showinfo = 1; break;
1126        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1127          case 'F': do_flip = 1; break;
1128          case 'G': do_G = 1; break;
1129          case 'I': do_showinfo = 1; break;
1130          case 'J': options |= PCRE_DUPNAMES; break;
1131          case 'M': log_store = 1; break;
1132          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1133    
1134    #if !defined NOPOSIX
1135        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1136    #endif
1137    
1138        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1139        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
1140        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1141        case '\n': case ' ': break;        case 'Z': debug_lengths = 0; break;
1142          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1143          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1144    
1145          case 'L':
1146          ppp = pp;
1147          /* The '\r' test here is so that it works on Windows. */
1148          /* The '0' test is just in case this is an unterminated line. */
1149          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1150          *ppp = 0;
1151          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1152            {
1153            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1154            goto SKIP_DATA;
1155            }
1156          locale_set = 1;
1157          tables = pcre_maketables();
1158          pp = ppp;
1159          break;
1160    
1161          case '>':
1162          to_file = pp;
1163          while (*pp != 0) pp++;
1164          while (isspace(pp[-1])) pp--;
1165          *pp = 0;
1166          break;
1167    
1168          case '<':
1169            {
1170            int x = check_newline(pp, outfile);
1171            if (x == 0) goto SKIP_DATA;
1172            options |= x;
1173            while (*pp++ != '>');
1174            }
1175          break;
1176    
1177          case '\r':                      /* So that it works in Windows */
1178          case '\n':
1179          case ' ':
1180          break;
1181    
1182        default:        default:
1183        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1184        goto SKIP_DATA;        goto SKIP_DATA;
1185        }        }
1186      }      }
1187    
1188    /* Handle compiing via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1189    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
1190      local character tables. */
1191    
1192    #if !defined NOPOSIX
1193    if (posix || do_posix)    if (posix || do_posix)
1194      {      {
1195      int rc;      int rc;
1196      int cflags = 0;      int cflags = 0;
1197    
1198      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1199      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1200        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1201        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1202        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1203    
1204      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1205    
1206      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 423  for (;;) Line 1208  for (;;)
1208    
1209      if (rc != 0)      if (rc != 0)
1210        {        {
1211        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1212        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1213        goto SKIP_DATA;        goto SKIP_DATA;
1214        }        }
# Line 432  for (;;) Line 1217  for (;;)
1217    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1218    
1219    else    else
1220    #endif  /* !defined NOPOSIX */
1221    
1222      {      {
1223      if (timeit)      if (timeit > 0)
1224        {        {
1225        register int i;        register int i;
1226        clock_t time_taken;        clock_t time_taken;
1227        clock_t start_time = clock();        clock_t start_time = clock();
1228        for (i = 0; i < 4000; i++)        for (i = 0; i < timeit; i++)
1229          {          {
1230          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1231          if (re != NULL) free(re);          if (re != NULL) free(re);
1232          }          }
1233        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1234        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1235          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)timeit) /
1236              (double)CLOCKS_PER_SEC);
1237        }        }
1238    
1239      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1240    
1241      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1242      if non-interactive. */      if non-interactive. */
# Line 461  for (;;) Line 1249  for (;;)
1249          {          {
1250          for (;;)          for (;;)
1251            {            {
1252            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1253              goto END_OFF;              {
1254                done = 1;
1255                goto CONTINUE;
1256                }
1257            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
1258            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
1259            if (len == 0) break;            if (len == 0) break;
1260            }            }
1261          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1262          }          }
1263        continue;        goto CONTINUE;
1264        }        }
1265    
1266      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
1267        info-returning functions. The old one has a limited interface and
1268        returns only limited data. Check that it agrees with the newer one. */
1269    
1270        if (log_store)
1271          fprintf(outfile, "Memory allocation (code space): %d\n",
1272            (int)(gotten_store -
1273                  sizeof(real_pcre) -
1274                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1275    
1276      if (showinfo || do_debug)      /* Extract the size for possible writing before possibly flipping it,
1277        and remember the store that was got. */
1278    
1279        true_size = ((real_pcre *)re)->size;
1280        regex_gotten_store = gotten_store;
1281    
1282        /* If /S was present, study the regexp to generate additional info to
1283        help with the matching. */
1284    
1285        if (do_study)
1286          {
1287          if (timeit > 0)
1288            {
1289            register int i;
1290            clock_t time_taken;
1291            clock_t start_time = clock();
1292            for (i = 0; i < timeit; i++)
1293              extra = pcre_study(re, study_options, &error);
1294            time_taken = clock() - start_time;
1295            if (extra != NULL) free(extra);
1296            fprintf(outfile, "  Study time %.4f milliseconds\n",
1297              (((double)time_taken * 1000.0) / (double)timeit) /
1298                (double)CLOCKS_PER_SEC);
1299            }
1300          extra = pcre_study(re, study_options, &error);
1301          if (error != NULL)
1302            fprintf(outfile, "Failed to study: %s\n", error);
1303          else if (extra != NULL)
1304            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1305          }
1306    
1307        /* If the 'F' option was present, we flip the bytes of all the integer
1308        fields in the regex data block and the study block. This is to make it
1309        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1310        compiled on a different architecture. */
1311    
1312        if (do_flip)
1313        {        {
1314        int first_char, count;        real_pcre *rre = (real_pcre *)re;
1315          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1316          rre->size = byteflip(rre->size, sizeof(rre->size));
1317          rre->options = byteflip(rre->options, sizeof(rre->options));
1318          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1319          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1320          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1321          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1322          rre->name_table_offset = byteflip(rre->name_table_offset,
1323            sizeof(rre->name_table_offset));
1324          rre->name_entry_size = byteflip(rre->name_entry_size,
1325            sizeof(rre->name_entry_size));
1326          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1327    
1328          if (extra != NULL)
1329            {
1330            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1331            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1332            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1333            }
1334          }
1335    
1336        /* Extract information from the compiled data if required */
1337    
1338        SHOW_INFO:
1339    
1340        if (do_debug)
1341          {
1342          fprintf(outfile, "------------------------------------------------------------------\n");
1343          pcre_printint(re, outfile, debug_lengths);
1344          }
1345    
1346        if (do_showinfo)
1347          {
1348          unsigned long int get_options, all_options;
1349    #if !defined NOINFOCHECK
1350          int old_first_char, old_options, old_count;
1351    #endif
1352          int count, backrefmax, first_char, need_char;
1353          int nameentrysize, namecount;
1354          const uschar *nametable;
1355    
1356          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1357          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1358          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1359          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1360          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1361          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1362          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1363          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1364          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1365    
1366    #if !defined NOINFOCHECK
1367          old_count = pcre_info(re, &old_options, &old_first_char);
1368          if (count < 0) fprintf(outfile,
1369            "Error %d from pcre_info()\n", count);
1370          else
1371            {
1372            if (old_count != count) fprintf(outfile,
1373              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1374                old_count);
1375    
1376            if (old_first_char != first_char) fprintf(outfile,
1377              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1378                first_char, old_first_char);
1379    
1380            if (old_options != (int)get_options) fprintf(outfile,
1381              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1382                get_options, old_options);
1383            }
1384    #endif
1385    
1386          if (size != regex_gotten_store) fprintf(outfile,
1387            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1388            (int)size, (int)regex_gotten_store);
1389    
1390          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1391          if (backrefmax > 0)
1392            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1393    
1394          if (namecount > 0)
1395            {
1396            fprintf(outfile, "Named capturing subpatterns:\n");
1397            while (namecount-- > 0)
1398              {
1399              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1400                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1401                GET2(nametable, 0));
1402              nametable += nameentrysize;
1403              }
1404            }
1405    
1406          /* The NOPARTIAL bit is a private bit in the options, so we have
1407          to fish it out via out back door */
1408    
1409          all_options = ((real_pcre *)re)->options;
1410          if (do_flip)
1411            {
1412            all_options = byteflip(all_options, sizeof(all_options));
1413             }
1414    
1415          if ((all_options & PCRE_NOPARTIAL) != 0)
1416            fprintf(outfile, "Partial matching not supported\n");
1417    
1418          if (get_options == 0) fprintf(outfile, "No options\n");
1419            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1420              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1421              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1422              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1423              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1424              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1425              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1426              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1427              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1428              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1429              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1430              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1431              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1432              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1433    
1434          switch (get_options & PCRE_NEWLINE_BITS)
1435            {
1436            case PCRE_NEWLINE_CR:
1437            fprintf(outfile, "Forced newline sequence: CR\n");
1438            break;
1439    
1440            case PCRE_NEWLINE_LF:
1441            fprintf(outfile, "Forced newline sequence: LF\n");
1442            break;
1443    
1444            case PCRE_NEWLINE_CRLF:
1445            fprintf(outfile, "Forced newline sequence: CRLF\n");
1446            break;
1447    
1448            case PCRE_NEWLINE_ANY:
1449            fprintf(outfile, "Forced newline sequence: ANY\n");
1450            break;
1451    
1452            default:
1453            break;
1454            }
1455    
1456          if (first_char == -1)
1457            {
1458            fprintf(outfile, "First char at start or follows newline\n");
1459            }
1460          else if (first_char < 0)
1461            {
1462            fprintf(outfile, "No first char\n");
1463            }
1464          else
1465            {
1466            int ch = first_char & 255;
1467            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1468              "" : " (caseless)";
1469            if (PRINTHEX(ch))
1470              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1471            else
1472              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1473            }
1474    
1475          if (need_char < 0)
1476            {
1477            fprintf(outfile, "No need char\n");
1478            }
1479          else
1480            {
1481            int ch = need_char & 255;
1482            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1483              "" : " (caseless)";
1484            if (PRINTHEX(ch))
1485              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1486            else
1487              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1488            }
1489    
1490        if (debug || do_debug) print_internals(re);        /* Don't output study size; at present it is in any case a fixed
1491          value, but it varies, depending on the computer architecture, and
1492          so messes up the test suite. (And with the /F option, it might be
1493          flipped.) */
1494    
1495        count = pcre_info(re, &options, &first_char);        if (do_study)
       if (count < 0) fprintf(outfile,  
         "Error %d while reading info\n", count);  
       else  
1496          {          {
1497          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (extra == NULL)
1498          if (options == 0) fprintf(outfile, "No options\n");            fprintf(outfile, "Study returned NULL\n");
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "");  
         if (first_char == -1)  
           {  
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
1499          else          else
1500            {            {
1501            if (isprint(first_char))            uschar *start_bits = NULL;
1502              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1503    
1504              if (start_bits == NULL)
1505                fprintf(outfile, "No starting byte set\n");
1506            else            else
1507              fprintf(outfile, "First char = %d\n", first_char);              {
1508                int i;
1509                int c = 24;
1510                fprintf(outfile, "Starting byte set: ");
1511                for (i = 0; i < 256; i++)
1512                  {
1513                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1514                    {
1515                    if (c > 75)
1516                      {
1517                      fprintf(outfile, "\n  ");
1518                      c = 2;
1519                      }
1520                    if (PRINTHEX(i) && i != ' ')
1521                      {
1522                      fprintf(outfile, "%c ", i);
1523                      c += 2;
1524                      }
1525                    else
1526                      {
1527                      fprintf(outfile, "\\x%02x ", i);
1528                      c += 5;
1529                      }
1530                    }
1531                  }
1532                fprintf(outfile, "\n");
1533                }
1534            }            }
1535          }          }
1536        }        }
1537    
1538      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1539      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1540        the study length, in big-endian order. */
1541    
1542      if (do_study)      if (to_file != NULL)
1543        {        {
1544        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1545          if (f == NULL)
1546          {          {
1547          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < 4000; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.2f milliseconds\n",  
           ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
1548          }          }
1549          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
1550          {          {
1551          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1552          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (true_size >> 24)  & 255;
1553            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1554            sbuf[2] = (true_size >>  8)  & 255;
1555            sbuf[3] = (true_size)  & 255;
1556    
1557            sbuf[4] = (true_study_size >> 24)  & 255;
1558            sbuf[5] = (true_study_size >> 16)  & 255;
1559            sbuf[6] = (true_study_size >>  8)  & 255;
1560            sbuf[7] = (true_study_size)  & 255;
1561    
1562            if (fwrite(sbuf, 1, 8, f) < 8 ||
1563                fwrite(re, 1, true_size, f) < true_size)
1564              {
1565              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1566              }
1567          else          else
1568            {            {
1569            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1570            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1571              {              {
1572              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1573                    true_study_size)
1574                {                {
1575                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1576                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1577                }                }
1578                else fprintf(outfile, "Study data written to %s\n", to_file);
1579    
1580              }              }
           fprintf(outfile, "\n");  
1581            }            }
1582            fclose(f);
1583          }          }
1584    
1585          new_free(re);
1586          if (extra != NULL) new_free(extra);
1587          if (tables != NULL) new_free((void *)tables);
1588          continue;  /* With next regex */
1589        }        }
1590      }      }        /* End of non-POSIX compile */
1591    
1592    /* Read data lines and test them */    /* Read data lines and test them */
1593    
1594    for (;;)    for (;;)
1595      {      {
1596      unsigned char *pp;      uschar *q;
1597        uschar *bptr;
1598        int *use_offsets = offsets;
1599        int use_size_offsets = size_offsets;
1600        int callout_data = 0;
1601        int callout_data_set = 0;
1602      int count, c;      int count, c;
1603      int offsets[30];      int copystrings = 0;
1604      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
1605        int getstrings = 0;
1606        int getlist = 0;
1607        int gmatched = 0;
1608        int start_offset = 0;
1609        int g_notempty = 0;
1610        int use_dfa = 0;
1611    
1612      options = 0;      options = 0;
1613    
1614      if (infile == stdin) printf("  data> ");      *copynames = 0;
1615      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;      *getnames = 0;
1616      if (infile != stdin) fprintf(outfile, (char *)buffer);  
1617        copynamesptr = copynames;
1618        getnamesptr = getnames;
1619    
1620        pcre_callout = callout;
1621        first_callout = 1;
1622        callout_extra = 0;
1623        callout_count = 0;
1624        callout_fail_count = 999999;
1625        callout_fail_id = -1;
1626        show_malloc = 0;
1627    
1628        if (extra != NULL) extra->flags &=
1629          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1630    
1631        len = 0;
1632        for (;;)
1633          {
1634          if (infile == stdin) printf("data> ");
1635          if (extend_inputline(infile, buffer + len) == NULL)
1636            {
1637            if (len > 0) break;
1638            done = 1;
1639            goto CONTINUE;
1640            }
1641          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1642          len = (int)strlen((char *)buffer);
1643          if (buffer[len-1] == '\n') break;
1644          }
1645    
     len = (int)strlen((char *)buffer);  
1646      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1647      buffer[len] = 0;      buffer[len] = 0;
1648      if (len == 0) break;      if (len == 0) break;
# Line 600  for (;;) Line 1650  for (;;)
1650      p = buffer;      p = buffer;
1651      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1652    
1653      pp = dbuffer;      bptr = q = dbuffer;
1654      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1655        {        {
1656        int i = 0;        int i = 0;
1657        int n = 0;        int n = 0;
1658    
1659        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1660          {          {
1661          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 621  for (;;) Line 1672  for (;;)
1672          c -= '0';          c -= '0';
1673          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1674            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1675    
1676    #if !defined NOUTF8
1677            if (use_utf8 && c > 255)
1678              {
1679              unsigned char buff8[8];
1680              int ii, utn;
1681              utn = ord2utf8(c, buff8);
1682              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1683              c = buff8[ii];   /* Last byte */
1684              }
1685    #endif
1686          break;          break;
1687    
1688          case 'x':          case 'x':
1689    
1690            /* Handle \x{..} specially - new Perl thing for utf8 */
1691    
1692    #if !defined NOUTF8
1693            if (*p == '{')
1694              {
1695              unsigned char *pt = p;
1696              c = 0;
1697              while (isxdigit(*(++pt)))
1698                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1699              if (*pt == '}')
1700                {
1701                unsigned char buff8[8];
1702                int ii, utn;
1703                utn = ord2utf8(c, buff8);
1704                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1705                c = buff8[ii];   /* Last byte */
1706                p = pt + 1;
1707                break;
1708                }
1709              /* Not correct form; fall through */
1710              }
1711    #endif
1712    
1713            /* Ordinary \x */
1714    
1715          c = 0;          c = 0;
1716          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1717            {            {
# Line 632  for (;;) Line 1720  for (;;)
1720            }            }
1721          break;          break;
1722    
1723          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1724          p--;          p--;
1725          continue;          continue;
1726    
1727            case '>':
1728            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1729            continue;
1730    
1731          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1732          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1733          continue;          continue;
# Line 644  for (;;) Line 1736  for (;;)
1736          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1737          continue;          continue;
1738    
1739          case 'E':          case 'C':
1740          options |= PCRE_DOLLAR_ENDONLY;          if (isdigit(*p))    /* Set copy string */
1741              {
1742              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1743              copystrings |= 1 << n;
1744              }
1745            else if (isalnum(*p))
1746              {
1747              uschar *npp = copynamesptr;
1748              while (isalnum(*p)) *npp++ = *p++;
1749              *npp++ = 0;
1750              *npp = 0;
1751              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1752              if (n < 0)
1753                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1754              copynamesptr = npp;
1755              }
1756            else if (*p == '+')
1757              {
1758              callout_extra = 1;
1759              p++;
1760              }
1761            else if (*p == '-')
1762              {
1763              pcre_callout = NULL;
1764              p++;
1765              }
1766            else if (*p == '!')
1767              {
1768              callout_fail_id = 0;
1769              p++;
1770              while(isdigit(*p))
1771                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1772              callout_fail_count = 0;
1773              if (*p == '!')
1774                {
1775                p++;
1776                while(isdigit(*p))
1777                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1778                }
1779              }
1780            else if (*p == '*')
1781              {
1782              int sign = 1;
1783              callout_data = 0;
1784              if (*(++p) == '-') { sign = -1; p++; }
1785              while(isdigit(*p))
1786                callout_data = callout_data * 10 + *p++ - '0';
1787              callout_data *= sign;
1788              callout_data_set = 1;
1789              }
1790            continue;
1791    
1792    #if !defined NODFA
1793            case 'D':
1794    #if !defined NOPOSIX
1795            if (posix || do_posix)
1796              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1797            else
1798    #endif
1799              use_dfa = 1;
1800            continue;
1801    
1802            case 'F':
1803            options |= PCRE_DFA_SHORTEST;
1804            continue;
1805    #endif
1806    
1807            case 'G':
1808            if (isdigit(*p))
1809              {
1810              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1811              getstrings |= 1 << n;
1812              }
1813            else if (isalnum(*p))
1814              {
1815              uschar *npp = getnamesptr;
1816              while (isalnum(*p)) *npp++ = *p++;
1817              *npp++ = 0;
1818              *npp = 0;
1819              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1820              if (n < 0)
1821                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1822              getnamesptr = npp;
1823              }
1824          continue;          continue;
1825    
1826          case 'I':          case 'L':
1827          options |= PCRE_CASELESS;          getlist = 1;
1828          continue;          continue;
1829    
1830          case 'M':          case 'M':
1831          options |= PCRE_MULTILINE;          find_match_limit = 1;
1832          continue;          continue;
1833    
1834          case 'S':          case 'N':
1835          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1836          continue;          continue;
1837    
1838          case 'O':          case 'O':
1839          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1840          if (n <= (int)sizeof(offsets)/sizeof(int)) size_offsets = n;          if (n > size_offsets_max)
1841              {
1842              size_offsets_max = n;
1843              free(offsets);
1844              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1845              if (offsets == NULL)
1846                {
1847                printf("** Failed to get %d bytes of memory for offsets vector\n",
1848                  size_offsets_max * sizeof(int));
1849                yield = 1;
1850                goto EXIT;
1851                }
1852              }
1853            use_size_offsets = n;
1854            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1855            continue;
1856    
1857            case 'P':
1858            options |= PCRE_PARTIAL;
1859            continue;
1860    
1861            case 'Q':
1862            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1863            if (extra == NULL)
1864              {
1865              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1866              extra->flags = 0;
1867              }
1868            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1869            extra->match_limit_recursion = n;
1870            continue;
1871    
1872            case 'q':
1873            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1874            if (extra == NULL)
1875              {
1876              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1877              extra->flags = 0;
1878              }
1879            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1880            extra->match_limit = n;
1881            continue;
1882    
1883    #if !defined NODFA
1884            case 'R':
1885            options |= PCRE_DFA_RESTART;
1886            continue;
1887    #endif
1888    
1889            case 'S':
1890            show_malloc = 1;
1891          continue;          continue;
1892    
1893          case 'Z':          case 'Z':
1894          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1895          continue;          continue;
1896    
1897            case '?':
1898            options |= PCRE_NO_UTF8_CHECK;
1899            continue;
1900    
1901            case '<':
1902              {
1903              int x = check_newline(p, outfile);
1904              if (x == 0) goto NEXT_DATA;
1905              options |= x;
1906              while (*p++ != '>');
1907              }
1908            continue;
1909          }          }
1910        *pp++ = c;        *q++ = c;
1911          }
1912        *q = 0;
1913        len = q - dbuffer;
1914    
1915        if ((all_use_dfa || use_dfa) && find_match_limit)
1916          {
1917          printf("**Match limit not relevant for DFA matching: ignored\n");
1918          find_match_limit = 0;
1919        }        }
     *pp = 0;  
     len = pp - dbuffer;  
1920    
1921      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1922      support timing. */      support timing or playing with the match limit or callout data. */
1923    
1924    #if !defined NOPOSIX
1925      if (posix || do_posix)      if (posix || do_posix)
1926        {        {
1927        int rc;        int rc;
1928        int eflags = 0;        int eflags = 0;
1929        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1930          if (use_size_offsets > 0)
1931            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1932        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1933        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1934    
1935        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1936    
1937        if (rc != 0)        if (rc != 0)
1938          {          {
1939          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1940          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1941          }          }
1942          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1943                  != 0)
1944            {
1945            fprintf(outfile, "Matched with REG_NOSUB\n");
1946            }
1947        else        else
1948          {          {
1949          size_t i;          size_t i;
1950          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1951            {            {
1952            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1953              {              {
1954              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
1955              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1956                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1957              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1958                if (i == 0 && do_showrest)
1959                  {
1960                  fprintf(outfile, " 0+ ");
1961                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1962                    outfile);
1963                  fprintf(outfile, "\n");
1964                  }
1965              }              }
1966            }            }
1967          }          }
1968          free(pmatch);
1969        }        }
1970    
1971      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1972    
1973      else      else
1974    #endif  /* !defined NOPOSIX */
1975    
1976        for (;; gmatched++)    /* Loop for /g or /G */
1977        {        {
1978        if (timeit)        if (timeitm > 0)
1979          {          {
1980          register int i;          register int i;
1981          clock_t time_taken;          clock_t time_taken;
1982          clock_t start_time = clock();          clock_t start_time = clock();
1983          for (i = 0; i < 4000; i++)  
1984            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
1985              size_offsets);          if (all_use_dfa || use_dfa)
1986              {
1987              int workspace[1000];
1988              for (i = 0; i < timeitm; i++)
1989                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1990                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1991                  sizeof(workspace)/sizeof(int));
1992              }
1993            else
1994    #endif
1995    
1996            for (i = 0; i < timeitm; i++)
1997              count = pcre_exec(re, extra, (char *)bptr, len,
1998                start_offset, options | g_notempty, use_offsets, use_size_offsets);
1999    
2000          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2001          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2002            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)timeitm) /
2003                (double)CLOCKS_PER_SEC);
2004            }
2005    
2006          /* If find_match_limit is set, we want to do repeated matches with
2007          varying limits in order to find the minimum value for the match limit and
2008          for the recursion limit. */
2009    
2010          if (find_match_limit)
2011            {
2012            if (extra == NULL)
2013              {
2014              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2015              extra->flags = 0;
2016              }
2017    
2018            (void)check_match_limit(re, extra, bptr, len, start_offset,
2019              options|g_notempty, use_offsets, use_size_offsets,
2020              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2021              PCRE_ERROR_MATCHLIMIT, "match()");
2022    
2023            count = check_match_limit(re, extra, bptr, len, start_offset,
2024              options|g_notempty, use_offsets, use_size_offsets,
2025              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2026              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2027            }
2028    
2029          /* If callout_data is set, use the interface with additional data */
2030    
2031          else if (callout_data_set)
2032            {
2033            if (extra == NULL)
2034              {
2035              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2036              extra->flags = 0;
2037              }
2038            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2039            extra->callout_data = &callout_data;
2040            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2041              options | g_notempty, use_offsets, use_size_offsets);
2042            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2043          }          }
2044    
2045        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
2046          size_offsets);        value of match_limit. */
2047    
2048    #if !defined NODFA
2049          else if (all_use_dfa || use_dfa)
2050            {
2051            int workspace[1000];
2052            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2053              options | g_notempty, use_offsets, use_size_offsets, workspace,
2054              sizeof(workspace)/sizeof(int));
2055            if (count == 0)
2056              {
2057              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2058              count = use_size_offsets/2;
2059              }
2060            }
2061    #endif
2062    
2063        if (count == 0)        else
2064          {          {
2065          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2066          count = size_offsets/2;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2067            if (count == 0)
2068              {
2069              fprintf(outfile, "Matched, but too many substrings\n");
2070              count = use_size_offsets/3;
2071              }
2072          }          }
2073    
2074          /* Matched */
2075    
2076        if (count >= 0)        if (count >= 0)
2077          {          {
2078          int i;          int i, maxcount;
2079          count *= 2;  
2080          for (i = 0; i < count; i += 2)  #if !defined NODFA
2081            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2082    #endif
2083              maxcount = use_size_offsets/3;
2084    
2085            /* This is a check against a lunatic return value. */
2086    
2087            if (count > maxcount)
2088              {
2089              fprintf(outfile,
2090                "** PCRE error: returned count %d is too big for offset size %d\n",
2091                count, use_size_offsets);
2092              count = use_size_offsets/3;
2093              if (do_g || do_G)
2094                {
2095                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2096                do_g = do_G = FALSE;        /* Break g/G loop */
2097                }
2098              }
2099    
2100            for (i = 0; i < count * 2; i += 2)
2101            {            {
2102            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2103              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2104            else            else
2105              {              {
2106              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2107              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2108                  use_offsets[i+1] - use_offsets[i], outfile);
2109              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2110                if (i == 0)
2111                  {
2112                  if (do_showrest)
2113                    {
2114                    fprintf(outfile, " 0+ ");
2115                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2116                      outfile);
2117                    fprintf(outfile, "\n");
2118                    }
2119                  }
2120                }
2121              }
2122    
2123            for (i = 0; i < 32; i++)
2124              {
2125              if ((copystrings & (1 << i)) != 0)
2126                {
2127                char copybuffer[256];
2128                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2129                  i, copybuffer, sizeof(copybuffer));
2130                if (rc < 0)
2131                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2132                else
2133                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2134                }
2135              }
2136    
2137            for (copynamesptr = copynames;
2138                 *copynamesptr != 0;
2139                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2140              {
2141              char copybuffer[256];
2142              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2143                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2144              if (rc < 0)
2145                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2146              else
2147                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2148              }
2149    
2150            for (i = 0; i < 32; i++)
2151              {
2152              if ((getstrings & (1 << i)) != 0)
2153                {
2154                const char *substring;
2155                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2156                  i, &substring);
2157                if (rc < 0)
2158                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
2159                else
2160                  {
2161                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2162                  pcre_free_substring(substring);
2163                  }
2164                }
2165              }
2166    
2167            for (getnamesptr = getnames;
2168                 *getnamesptr != 0;
2169                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2170              {
2171              const char *substring;
2172              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2173                count, (char *)getnamesptr, &substring);
2174              if (rc < 0)
2175                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2176              else
2177                {
2178                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2179                pcre_free_substring(substring);
2180              }              }
2181            }            }
2182    
2183            if (getlist)
2184              {
2185              const char **stringlist;
2186              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2187                &stringlist);
2188              if (rc < 0)
2189                fprintf(outfile, "get substring list failed %d\n", rc);
2190              else
2191                {
2192                for (i = 0; i < count; i++)
2193                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2194                if (stringlist[i] != NULL)
2195                  fprintf(outfile, "string list not terminated by NULL\n");
2196                /* free((void *)stringlist); */
2197                pcre_free_substring_list(stringlist);
2198                }
2199              }
2200            }
2201    
2202          /* There was a partial match */
2203    
2204          else if (count == PCRE_ERROR_PARTIAL)
2205            {
2206            fprintf(outfile, "Partial match");
2207    #if !defined NODFA
2208            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2209              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2210                bptr + use_offsets[0]);
2211    #endif
2212            fprintf(outfile, "\n");
2213            break;  /* Out of the /g loop */
2214          }          }
2215    
2216          /* Failed to match. If this is a /g or /G loop and we previously set
2217          g_notempty after a null match, this is not necessarily the end. We want
2218          to advance the start offset, and continue. We won't be at the end of the
2219          string - that was checked before setting g_notempty.
2220    
2221          Complication arises in the case when the newline option is "any".
2222          If the previous match was at the end of a line terminated by CRLF, an
2223          advance of one character just passes the \r, whereas we should prefer the
2224          longer newline sequence, as does the code in pcre_exec(). Fudge the
2225          offset value to achieve this.
2226    
2227          Otherwise, in the case of UTF-8 matching, the advance must be one
2228          character, not one byte. */
2229    
2230        else        else
2231          {          {
2232          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2233              {
2234              int onechar = 1;
2235              unsigned int obits = ((real_pcre *)re)->options;
2236              use_offsets[0] = start_offset;
2237              if ((obits & PCRE_NEWLINE_BITS) == 0)
2238                {
2239                int d;
2240                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2241                obits = (d == '\r')? PCRE_NEWLINE_CR :
2242                        (d == '\n')? PCRE_NEWLINE_LF :
2243                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2244                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2245                }
2246              if ((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY &&
2247                  start_offset < len - 1 &&
2248                  bptr[start_offset] == '\r' &&
2249                  bptr[start_offset+1] == '\n')
2250                onechar++;
2251              else if (use_utf8)
2252                {
2253                while (start_offset + onechar < len)
2254                  {
2255                  int tb = bptr[start_offset+onechar];
2256                  if (tb <= 127) break;
2257                  tb &= 0xc0;
2258                  if (tb != 0 && tb != 0xc0) onechar++;
2259                  }
2260                }
2261              use_offsets[1] = start_offset + onechar;
2262              }
2263            else
2264              {
2265              if (count == PCRE_ERROR_NOMATCH)
2266                {
2267                if (gmatched == 0) fprintf(outfile, "No match\n");
2268                }
2269            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2270              break;  /* Out of the /g loop */
2271              }
2272          }          }
       }  
     }  
2273    
2274          /* If not /g or /G we are done */
2275    
2276          if (!do_g && !do_G) break;
2277    
2278          /* If we have matched an empty string, first check to see if we are at
2279          the end of the subject. If so, the /g loop is over. Otherwise, mimic
2280          what Perl's /g options does. This turns out to be rather cunning. First
2281          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2282          same point. If this fails (picked up above) we advance to the next
2283          character. */
2284    
2285          g_notempty = 0;
2286    
2287          if (use_offsets[0] == use_offsets[1])
2288            {
2289            if (use_offsets[0] == len) break;
2290            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2291            }
2292    
2293          /* For /g, update the start offset, leaving the rest alone */
2294    
2295          if (do_g) start_offset = use_offsets[1];
2296    
2297          /* For /G, update the pointer and length */
2298    
2299          else
2300            {
2301            bptr += use_offsets[1];
2302            len -= use_offsets[1];
2303            }
2304          }  /* End of loop for /g and /G */
2305    
2306        NEXT_DATA: continue;
2307        }    /* End of loop for data lines */
2308    
2309      CONTINUE:
2310    
2311    #if !defined NOPOSIX
2312    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2313    if (re != NULL) free(re);  #endif
2314    if (extra != NULL) free(extra);  
2315      if (re != NULL) new_free(re);
2316      if (extra != NULL) new_free(extra);
2317      if (tables != NULL)
2318        {
2319        new_free((void *)tables);
2320        setlocale(LC_CTYPE, "C");
2321        locale_set = 0;
2322        }
2323    }    }
2324    
2325  END_OFF:  if (infile == stdin) fprintf(outfile, "\n");
2326  fprintf(outfile, "\n");  
2327  return 0;  EXIT:
2328    
2329    if (infile != NULL && infile != stdin) fclose(infile);
2330    if (outfile != NULL && outfile != stdout) fclose(outfile);
2331    
2332    free(buffer);
2333    free(dbuffer);
2334    free(pbuffer);
2335    free(offsets);
2336    
2337    return yield;
2338  }  }
2339    
2340  /* End */  /* End of pcretest.c */

Legend:
Removed from v.7  
changed lines
  Added in v.147

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12