/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 13 by nigel, Sat Feb 24 21:38:21 2007 UTC revision 345 by ph10, Mon Apr 28 15:10:02 2008 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48    #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #define isatty _isatty         /* This is what Windows calls them, I'm told */
75    #define fileno _fileno
76    
77    #else
78    #include <sys/time.h>          /* These two includes are needed */
79    #include <sys/resource.h>      /* for setrlimit(). */
80    #define INPUT_MODE   "rb"
81    #define OUTPUT_MODE  "wb"
82    #endif
83    
84    
85  /* Use the internal info for displaying the results of pcre_study(). */  /* We have to include pcre_internal.h because we need the internal info for
86    displaying the results of pcre_study() and we also need to know about the
87    internal macros, structures, and other internal data values; pcretest has
88    "inside information" compared to a program that strictly follows the PCRE API.
89    
90    Although pcre_internal.h does itself include pcre.h, we explicitly include it
91    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92    appropriately for an application, not for building PCRE. */
93    
94    #include "pcre.h"
95    #include "pcre_internal.h"
96    
97    /* We need access to the data tables that PCRE uses. So as not to have to keep
98    two copies, we include the source file here, changing the names of the external
99    symbols to prevent clashes. */
100    
101    #define _pcre_utf8_table1      utf8_table1
102    #define _pcre_utf8_table1_size utf8_table1_size
103    #define _pcre_utf8_table2      utf8_table2
104    #define _pcre_utf8_table3      utf8_table3
105    #define _pcre_utf8_table4      utf8_table4
106    #define _pcre_utt              utt
107    #define _pcre_utt_size         utt_size
108    #define _pcre_utt_names        utt_names
109    #define _pcre_OP_lengths       OP_lengths
110    
111    #include "pcre_tables.c"
112    
113    /* We also need the pcre_printint() function for printing out compiled
114    patterns. This function is in a separate file so that it can be included in
115    pcre_compile.c when that module is compiled with debugging enabled.
116    
117    The definition of the macro PRINTABLE, which determines whether to print an
118    output character as-is or as a hex value when showing compiled patterns, is
119    contained in this file. We uses it here also, in cases when the locale has not
120    been explicitly changed, so as to get consistent output from systems that
121    differ in their output from isprint() even in the "C" locale. */
122    
123    #include "pcre_printint.src"
124    
125    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
126    
127    
128    /* It is possible to compile this test program without including support for
129    testing the POSIX interface, though this is not available via the standard
130    Makefile. */
131    
132  #include "internal.h"  #if !defined NOPOSIX
133  #include "pcreposix.h"  #include "pcreposix.h"
134    #endif
135    
136    /* It is also possible, for the benefit of the version currently imported into
137    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
138    interface to the DFA matcher (NODFA), and without the doublecheck of the old
139    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
140    UTF8 support if PCRE is built without it. */
141    
142    #ifndef SUPPORT_UTF8
143    #ifndef NOUTF8
144    #define NOUTF8
145    #endif
146    #endif
147    
148    
149    /* Other parameters */
150    
151  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
152  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 156 
156  #endif  #endif
157  #endif  #endif
158    
159    /* This is the default loop count for timing. */
160    
161    #define LOOPREPEAT 500000
162    
163    /* Static variables */
164    
165  static FILE *outfile;  static FILE *outfile;
166  static int log_store = 0;  static int log_store = 0;
167    static int callout_count;
168    static int callout_extra;
169    static int callout_fail_count;
170    static int callout_fail_id;
171    static int debug_lengths;
172    static int first_callout;
173    static int locale_set = 0;
174    static int show_malloc;
175    static int use_utf8;
176    static size_t gotten_store;
177    
178    /* The buffers grow automatically if very long input lines are encountered. */
179    
180    static int buffer_size = 50000;
181    static uschar *buffer = NULL;
182    static uschar *dbuffer = NULL;
183    static uschar *pbuffer = NULL;
184    
185    
186    
187  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
188  code as contained in pcre.c under the DEBUG macro. */  *        Read or extend an input line            *
189    *************************************************/
190    
191  static const char *OP_names[] = {  /* Input lines are read into buffer, but both patterns and data lines can be
192    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  continued over multiple input lines. In addition, if the buffer fills up, we
193    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  want to automatically expand it so as to be able to handle extremely large
194    "not",  lines that are needed for certain stress tests. When the input buffer is
195    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  expanded, the other two buffers must also be expanded likewise, and the
196    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  contents of pbuffer, which are a copy of the input for callouts, must be
197    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  preserved (for when expansion happens for a data line). This is not the most
198    "*", "*?", "+", "+?", "?", "??", "{", "{",  optimal way of handling this, but hey, this is just a test program!
199    "class", "negclass", "Ref",  
200    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  Arguments:
201    "Brazero", "Braminzero", "Bra"    f            the file to read
202  };    start        where in buffer to start (this *must* be within buffer)
203      prompt       for stdin or readline()
204    
205  static void print_internals(pcre *re)  Returns:       pointer to the start of new data
206  {                 could be a copy of start, or could be moved
207  unsigned char *code = ((real_pcre *)re)->code;                 NULL if no data read and EOF reached
208    */
 printf("------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   printf("%3d ", code - ((real_pcre *)re)->code);  
   
   if (*code >= OP_BRA)  
     {  
     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     printf("    %s\n", OP_names[*code]);  
     printf("------------------------------------------------------------------\n");  
     return;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     printf("%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       printf("    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) printf("    %c", c);  
       else printf("    \\x%02x", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf("0,");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     printf("    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
     case OP_NEGCLASS:  
       {  
       int i, min, max;  
       if (*code++ == OP_CLASS) printf("    [");  
         else printf("   ^[");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
209    
210          case OP_CRRANGE:  static uschar *
211          case OP_CRMINRANGE:  extend_inputline(FILE *f, uschar *start, const char *prompt)
212          min = (code[1] << 8) + code[2];  {
213          max = (code[3] << 8) + code[4];  uschar *here = start;
         if (max == 0) printf("{%d,}", min);  
         else printf("{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) printf("?");  
         code += 4;  
         break;  
214    
215          default:  for (;;)
216          code--;    {
217          }    int rlen = buffer_size - (here - buffer);
218    
219      if (rlen > 1000)
220        {
221        int dlen;
222    
223        /* If libreadline support is required, use readline() to read a line if the
224        input is a terminal. Note that readline() removes the trailing newline, so
225        we must put it back again, to be compatible with fgets(). */
226    
227    #ifdef SUPPORT_LIBREADLINE
228        if (isatty(fileno(f)))
229          {
230          size_t len;
231          char *s = readline(prompt);
232          if (s == NULL) return (here == start)? NULL : start;
233          len = strlen(s);
234          if (len > 0) add_history(s);
235          if (len > rlen - 1) len = rlen - 1;
236          memcpy(here, s, len);
237          here[len] = '\n';
238          here[len+1] = 0;
239          free(s);
240        }        }
241      break;      else
242    #endif
243    
244        /* Read the next line by normal means, prompting if the file is stdin. */
245    
246      /* Anything else is just a one-node item */        {
247          if (f == stdin) printf(prompt);
248          if (fgets((char *)here, rlen,  f) == NULL)
249            return (here == start)? NULL : start;
250          }
251    
252      default:      dlen = (int)strlen((char *)here);
253      printf("    %s", OP_names[*code]);      if (dlen > 0 && here[dlen - 1] == '\n') return start;
254      break;      here += dlen;
255      }      }
256    
257    code++;    else
258    printf("\n");      {
259        int new_buffer_size = 2*buffer_size;
260        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
261        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
262        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
263    
264        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
265          {
266          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
267          exit(1);
268          }
269    
270        memcpy(new_buffer, buffer, buffer_size);
271        memcpy(new_pbuffer, pbuffer, buffer_size);
272    
273        buffer_size = new_buffer_size;
274    
275        start = new_buffer + (start - buffer);
276        here = new_buffer + (here - buffer);
277    
278        free(buffer);
279        free(dbuffer);
280        free(pbuffer);
281    
282        buffer = new_buffer;
283        dbuffer = new_dbuffer;
284        pbuffer = new_pbuffer;
285        }
286      }
287    
288    return NULL;  /* Control never gets here */
289    }
290    
291    
292    
293    
294    
295    
296    
297    /*************************************************
298    *          Read number from string               *
299    *************************************************/
300    
301    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
302    around with conditional compilation, just do the job by hand. It is only used
303    for unpicking arguments, so just keep it simple.
304    
305    Arguments:
306      str           string to be converted
307      endptr        where to put the end pointer
308    
309    Returns:        the unsigned long
310    */
311    
312    static int
313    get_value(unsigned char *str, unsigned char **endptr)
314    {
315    int result = 0;
316    while(*str != 0 && isspace(*str)) str++;
317    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
318    *endptr = str;
319    return(result);
320    }
321    
322    
323    
324    
325    /*************************************************
326    *            Convert UTF-8 string to value       *
327    *************************************************/
328    
329    /* This function takes one or more bytes that represents a UTF-8 character,
330    and returns the value of the character.
331    
332    Argument:
333      utf8bytes   a pointer to the byte vector
334      vptr        a pointer to an int to receive the value
335    
336    Returns:      >  0 => the number of bytes consumed
337                  -6 to 0 => malformed UTF-8 character at offset = (-return)
338    */
339    
340    #if !defined NOUTF8
341    
342    static int
343    utf82ord(unsigned char *utf8bytes, int *vptr)
344    {
345    int c = *utf8bytes++;
346    int d = c;
347    int i, j, s;
348    
349    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
350      {
351      if ((d & 0x80) == 0) break;
352      d <<= 1;
353    }    }
354    
355    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
356    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
357    
358    /* i now has a value in the range 1-5 */
359    
360    s = 6*i;
361    d = (c & utf8_table3[i]) << s;
362    
363    for (j = 0; j < i; j++)
364      {
365      c = *utf8bytes++;
366      if ((c & 0xc0) != 0x80) return -(j+1);
367      s -= 6;
368      d |= (c & 0x3f) << s;
369      }
370    
371    /* Check that encoding was the correct unique one */
372    
373    for (j = 0; j < utf8_table1_size; j++)
374      if (d <= utf8_table1[j]) break;
375    if (j != i) return -(i+1);
376    
377    /* Valid value */
378    
379    *vptr = d;
380    return i+1;
381  }  }
382    
383    #endif
384    
385    
386    
387    /*************************************************
388    *       Convert character value to UTF-8         *
389    *************************************************/
390    
391    /* This function takes an integer value in the range 0 - 0x7fffffff
392    and encodes it as a UTF-8 character in 0 to 6 bytes.
393    
394  /* Character string printing function. */  Arguments:
395      cvalue     the character value
396      utf8bytes  pointer to buffer for result - at least 6 bytes long
397    
398  static void pchars(unsigned char *p, int length)  Returns:     number of characters placed in the buffer
399    */
400    
401    #if !defined NOUTF8
402    
403    static int
404    ord2utf8(int cvalue, uschar *utf8bytes)
405  {  {
406  int c;  register int i, j;
407    for (i = 0; i < utf8_table1_size; i++)
408      if (cvalue <= utf8_table1[i]) break;
409    utf8bytes += i;
410    for (j = i; j > 0; j--)
411     {
412     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
413     cvalue >>= 6;
414     }
415    *utf8bytes = utf8_table2[i] | cvalue;
416    return i + 1;
417    }
418    
419    #endif
420    
421    
422    
423    /*************************************************
424    *             Print character string             *
425    *************************************************/
426    
427    /* Character string printing function. Must handle UTF-8 strings in utf8
428    mode. Yields number of characters printed. If handed a NULL file, just counts
429    chars without printing. */
430    
431    static int pchars(unsigned char *p, int length, FILE *f)
432    {
433    int c = 0;
434    int yield = 0;
435    
436  while (length-- > 0)  while (length-- > 0)
437    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
438      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
439      if (use_utf8)
440        {
441        int rc = utf82ord(p, &c);
442    
443        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
444          {
445          length -= rc - 1;
446          p += rc;
447          if (PRINTHEX(c))
448            {
449            if (f != NULL) fprintf(f, "%c", c);
450            yield++;
451            }
452          else
453            {
454            int n = 4;
455            if (f != NULL) fprintf(f, "\\x{%02x}", c);
456            yield += (n <= 0x000000ff)? 2 :
457                     (n <= 0x00000fff)? 3 :
458                     (n <= 0x0000ffff)? 4 :
459                     (n <= 0x000fffff)? 5 : 6;
460            }
461          continue;
462          }
463        }
464    #endif
465    
466       /* Not UTF-8, or malformed UTF-8  */
467    
468      c = *p++;
469      if (PRINTHEX(c))
470        {
471        if (f != NULL) fprintf(f, "%c", c);
472        yield++;
473        }
474      else
475        {
476        if (f != NULL) fprintf(f, "\\x%02x", c);
477        yield += 4;
478        }
479      }
480    
481    return yield;
482    }
483    
484    
485    
486    /*************************************************
487    *              Callout function                  *
488    *************************************************/
489    
490    /* Called from PCRE as a result of the (?C) item. We print out where we are in
491    the match. Yield zero unless more callouts than the fail count, or the callout
492    data is not zero. */
493    
494    static int callout(pcre_callout_block *cb)
495    {
496    FILE *f = (first_callout | callout_extra)? outfile : NULL;
497    int i, pre_start, post_start, subject_length;
498    
499    if (callout_extra)
500      {
501      fprintf(f, "Callout %d: last capture = %d\n",
502        cb->callout_number, cb->capture_last);
503    
504      for (i = 0; i < cb->capture_top * 2; i += 2)
505        {
506        if (cb->offset_vector[i] < 0)
507          fprintf(f, "%2d: <unset>\n", i/2);
508        else
509          {
510          fprintf(f, "%2d: ", i/2);
511          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
512            cb->offset_vector[i+1] - cb->offset_vector[i], f);
513          fprintf(f, "\n");
514          }
515        }
516      }
517    
518    /* Re-print the subject in canonical form, the first time or if giving full
519    datails. On subsequent calls in the same match, we use pchars just to find the
520    printed lengths of the substrings. */
521    
522    if (f != NULL) fprintf(f, "--->");
523    
524    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
525    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
526      cb->current_position - cb->start_match, f);
527    
528    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
529    
530    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
531      cb->subject_length - cb->current_position, f);
532    
533    if (f != NULL) fprintf(f, "\n");
534    
535    /* Always print appropriate indicators, with callout number if not already
536    shown. For automatic callouts, show the pattern offset. */
537    
538    if (cb->callout_number == 255)
539      {
540      fprintf(outfile, "%+3d ", cb->pattern_position);
541      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
542      }
543    else
544      {
545      if (callout_extra) fprintf(outfile, "    ");
546        else fprintf(outfile, "%3d ", cb->callout_number);
547      }
548    
549    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
550    fprintf(outfile, "^");
551    
552    if (post_start > 0)
553      {
554      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
555      fprintf(outfile, "^");
556      }
557    
558    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
559      fprintf(outfile, " ");
560    
561    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
562      pbuffer + cb->pattern_position);
563    
564    fprintf(outfile, "\n");
565    first_callout = 0;
566    
567    if (cb->callout_data != NULL)
568      {
569      int callout_data = *((int *)(cb->callout_data));
570      if (callout_data != 0)
571        {
572        fprintf(outfile, "Callout data = %d\n", callout_data);
573        return callout_data;
574        }
575      }
576    
577    return (cb->callout_number != callout_fail_id)? 0 :
578           (++callout_count >= callout_fail_count)? 1 : 0;
579  }  }
580    
581    
582    /*************************************************
583    *            Local malloc functions              *
584    *************************************************/
585    
586  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
587  compiled re. */  compiled re. */
588    
589  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
590  {  {
591  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
592  return malloc(size);  gotten_store = size;
593    if (show_malloc)
594      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
595    return block;
596    }
597    
598    static void new_free(void *block)
599    {
600    if (show_malloc)
601      fprintf(outfile, "free             %p\n", block);
602    free(block);
603    }
604    
605    
606    /* For recursion malloc/free, to test stacking calls */
607    
608    static void *stack_malloc(size_t size)
609    {
610    void *block = malloc(size);
611    if (show_malloc)
612      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
613    return block;
614    }
615    
616    static void stack_free(void *block)
617    {
618    if (show_malloc)
619      fprintf(outfile, "stack_free       %p\n", block);
620    free(block);
621    }
622    
623    
624    /*************************************************
625    *          Call pcre_fullinfo()                  *
626    *************************************************/
627    
628    /* Get one piece of information from the pcre_fullinfo() function */
629    
630    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
631    {
632    int rc;
633    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
634      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
635    }
636    
637    
638    
639    /*************************************************
640    *         Byte flipping function                 *
641    *************************************************/
642    
643    static unsigned long int
644    byteflip(unsigned long int value, int n)
645    {
646    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
647    return ((value & 0x000000ff) << 24) |
648           ((value & 0x0000ff00) <<  8) |
649           ((value & 0x00ff0000) >>  8) |
650           ((value & 0xff000000) >> 24);
651    }
652    
653    
654    
655    
656    /*************************************************
657    *        Check match or recursion limit          *
658    *************************************************/
659    
660    static int
661    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
662      int start_offset, int options, int *use_offsets, int use_size_offsets,
663      int flag, unsigned long int *limit, int errnumber, const char *msg)
664    {
665    int count;
666    int min = 0;
667    int mid = 64;
668    int max = -1;
669    
670    extra->flags |= flag;
671    
672    for (;;)
673      {
674      *limit = mid;
675    
676      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
677        use_offsets, use_size_offsets);
678    
679      if (count == errnumber)
680        {
681        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
682        min = mid;
683        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
684        }
685    
686      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
687                             count == PCRE_ERROR_PARTIAL)
688        {
689        if (mid == min + 1)
690          {
691          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
692          break;
693          }
694        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
695        max = mid;
696        mid = (min + mid)/2;
697        }
698      else break;    /* Some other error */
699      }
700    
701    extra->flags &= ~flag;
702    return count;
703    }
704    
705    
706    
707    /*************************************************
708    *         Case-independent strncmp() function    *
709    *************************************************/
710    
711    /*
712    Arguments:
713      s         first string
714      t         second string
715      n         number of characters to compare
716    
717    Returns:    < 0, = 0, or > 0, according to the comparison
718    */
719    
720    static int
721    strncmpic(uschar *s, uschar *t, int n)
722    {
723    while (n--)
724      {
725      int c = tolower(*s++) - tolower(*t++);
726      if (c) return c;
727      }
728    return 0;
729    }
730    
731    
732    
733    /*************************************************
734    *         Check newline indicator                *
735    *************************************************/
736    
737    /* This is used both at compile and run-time to check for <xxx> escapes, where
738    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
739    no match.
740    
741    Arguments:
742      p           points after the leading '<'
743      f           file for error message
744    
745    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
746    */
747    
748    static int
749    check_newline(uschar *p, FILE *f)
750    {
751    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
752    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
753    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
754    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
755    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
756    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
757    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
758    fprintf(f, "Unknown newline type at: <%s\n", p);
759    return 0;
760  }  }
761    
762    
763    
764    /*************************************************
765    *             Usage function                     *
766    *************************************************/
767    
768    static void
769    usage(void)
770    {
771    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
772    printf("Input and output default to stdin and stdout.\n");
773    #ifdef SUPPORT_LIBREADLINE
774    printf("If input is a terminal, readline() is used to read from it.\n");
775    #else
776    printf("This version of pcretest is not linked with readline().\n");
777    #endif
778    printf("\nOptions:\n");
779    printf("  -b       show compiled code (bytecode)\n");
780    printf("  -C       show PCRE compile-time options and exit\n");
781    printf("  -d       debug: show compiled code and information (-b and -i)\n");
782    #if !defined NODFA
783    printf("  -dfa     force DFA matching for all subjects\n");
784    #endif
785    printf("  -help    show usage information\n");
786    printf("  -i       show information about compiled patterns\n"
787           "  -m       output memory used information\n"
788           "  -o <n>   set size of offsets vector to <n>\n");
789    #if !defined NOPOSIX
790    printf("  -p       use POSIX interface\n");
791    #endif
792    printf("  -q       quiet: do not output PCRE version number at start\n");
793    printf("  -S <n>   set stack size to <n> megabytes\n");
794    printf("  -s       output store (memory) used information\n"
795           "  -t       time compilation and execution\n");
796    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
797    printf("  -tm      time execution (matching) only\n");
798    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
799    }
800    
801    
802    
803    /*************************************************
804    *                Main Program                    *
805    *************************************************/
806    
807  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
808  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
809  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 265  int options = 0; Line 815  int options = 0;
815  int study_options = 0;  int study_options = 0;
816  int op = 1;  int op = 1;
817  int timeit = 0;  int timeit = 0;
818    int timeitm = 0;
819  int showinfo = 0;  int showinfo = 0;
820    int showstore = 0;
821    int quiet = 0;
822    int size_offsets = 45;
823    int size_offsets_max;
824    int *offsets = NULL;
825    #if !defined NOPOSIX
826  int posix = 0;  int posix = 0;
827    #endif
828  int debug = 0;  int debug = 0;
829  int done = 0;  int done = 0;
830  unsigned char buffer[30000];  int all_use_dfa = 0;
831  unsigned char dbuffer[1024];  int yield = 0;
832    int stack_size;
833    
834    /* These vectors store, end-to-end, a list of captured substring names. Assume
835    that 1024 is plenty long enough for the few names we'll be testing. */
836    
837    uschar copynames[1024];
838    uschar getnames[1024];
839    
840    uschar *copynamesptr;
841    uschar *getnamesptr;
842    
843  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
844    when I am debugging. They grow automatically when very long lines are read. */
845    
846    buffer = (unsigned char *)malloc(buffer_size);
847    dbuffer = (unsigned char *)malloc(buffer_size);
848    pbuffer = (unsigned char *)malloc(buffer_size);
849    
850    /* The outfile variable is static so that new_malloc can use it. */
851    
852  outfile = stdout;  outfile = stdout;
853    
854    /* The following  _setmode() stuff is some Windows magic that tells its runtime
855    library to translate CRLF into a single LF character. At least, that's what
856    I've been told: never having used Windows I take this all on trust. Originally
857    it set 0x8000, but then I was advised that _O_BINARY was better. */
858    
859    #if defined(_WIN32) || defined(WIN32)
860    _setmode( _fileno( stdout ), _O_BINARY );
861    #endif
862    
863  /* Scan options */  /* Scan options */
864    
865  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
866    {    {
867    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
868    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
869      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
870        showstore = 1;
871      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
872      else if (strcmp(argv[op], "-b") == 0) debug = 1;
873    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
874    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
875    #if !defined NODFA
876      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
877    #endif
878      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
879          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
880            *endptr == 0))
881        {
882        op++;
883        argc--;
884        }
885      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
886        {
887        int both = argv[op][2] == 0;
888        int temp;
889        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
890                         *endptr == 0))
891          {
892          timeitm = temp;
893          op++;
894          argc--;
895          }
896        else timeitm = LOOPREPEAT;
897        if (both) timeit = timeitm;
898        }
899      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
900          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
901            *endptr == 0))
902        {
903    #if defined(_WIN32) || defined(WIN32)
904        printf("PCRE: -S not supported on this OS\n");
905        exit(1);
906    #else
907        int rc;
908        struct rlimit rlim;
909        getrlimit(RLIMIT_STACK, &rlim);
910        rlim.rlim_cur = stack_size * 1024 * 1024;
911        rc = setrlimit(RLIMIT_STACK, &rlim);
912        if (rc != 0)
913          {
914        printf("PCRE: setrlimit() failed with error %d\n", rc);
915        exit(1);
916          }
917        op++;
918        argc--;
919    #endif
920        }
921    #if !defined NOPOSIX
922    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
923    #endif
924      else if (strcmp(argv[op], "-C") == 0)
925        {
926        int rc;
927        printf("PCRE version %s\n", pcre_version());
928        printf("Compiled with\n");
929        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
930        printf("  %sUTF-8 support\n", rc? "" : "No ");
931        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
932        printf("  %sUnicode properties support\n", rc? "" : "No ");
933        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
934        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
935          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
936          (rc == -2)? "ANYCRLF" :
937          (rc == -1)? "ANY" : "???");
938        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
939        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
940                                         "all Unicode newlines");
941        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
942        printf("  Internal link size = %d\n", rc);
943        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
944        printf("  POSIX malloc threshold = %d\n", rc);
945        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
946        printf("  Default match limit = %d\n", rc);
947        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
948        printf("  Default recursion depth limit = %d\n", rc);
949        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
950        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
951        goto EXIT;
952        }
953      else if (strcmp(argv[op], "-help") == 0 ||
954               strcmp(argv[op], "--help") == 0)
955        {
956        usage();
957        goto EXIT;
958        }
959    else    else
960      {      {
961      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
962      return 1;      usage();
963        yield = 1;
964        goto EXIT;
965      }      }
966    op++;    op++;
967    argc--;    argc--;
968    }    }
969    
970    /* Get the store for the offsets vector, and remember what it was */
971    
972    size_offsets_max = size_offsets;
973    offsets = (int *)malloc(size_offsets_max * sizeof(int));
974    if (offsets == NULL)
975      {
976      printf("** Failed to get %d bytes of memory for offsets vector\n",
977        (int)(size_offsets_max * sizeof(int)));
978      yield = 1;
979      goto EXIT;
980      }
981    
982  /* Sort out the input and output files */  /* Sort out the input and output files */
983    
984  if (argc > 1)  if (argc > 1)
985    {    {
986    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
987    if (infile == NULL)    if (infile == NULL)
988      {      {
989      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
990      return 1;      yield = 1;
991        goto EXIT;
992      }      }
993    }    }
994    
995  if (argc > 2)  if (argc > 2)
996    {    {
997    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
998    if (outfile == NULL)    if (outfile == NULL)
999      {      {
1000      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1001      return 1;      yield = 1;
1002        goto EXIT;
1003      }      }
1004    }    }
1005    
1006  /* Set alternative malloc function */  /* Set alternative malloc function */
1007    
1008  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1009    pcre_free = new_free;
1010    pcre_stack_malloc = stack_malloc;
1011    pcre_stack_free = stack_free;
1012    
1013  /* Heading line, then prompt for first re if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1014    
1015  fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
 fprintf(outfile, "PCRE version %s\n\n", pcre_version());  
1016    
1017  /* Main loop */  /* Main loop */
1018    
# Line 331  while (!done) Line 1020  while (!done)
1020    {    {
1021    pcre *re = NULL;    pcre *re = NULL;
1022    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
1023    
1024    #if !defined NOPOSIX  /* There are still compilers that require no indent */
1025    regex_t preg;    regex_t preg;
   const char *error;  
   unsigned char *p, *pp;  
   int do_study = 0;  
   int do_debug = 0;  
1026    int do_posix = 0;    int do_posix = 0;
1027    int erroroffset, len, delimiter;  #endif
1028    
1029    if (infile == stdin) printf("  re> ");    const char *error;
1030    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    unsigned char *p, *pp, *ppp;
1031    if (infile != stdin) fprintf(outfile, (char *)buffer);    unsigned char *to_file = NULL;
1032      const unsigned char *tables = NULL;
1033      unsigned long int true_size, true_study_size = 0;
1034      size_t size, regex_gotten_store;
1035      int do_study = 0;
1036      int do_debug = debug;
1037      int do_G = 0;
1038      int do_g = 0;
1039      int do_showinfo = showinfo;
1040      int do_showrest = 0;
1041      int do_flip = 0;
1042      int erroroffset, len, delimiter, poffset;
1043    
1044      use_utf8 = 0;
1045      debug_lengths = 1;
1046    
1047      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
1048      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1049      fflush(outfile);
1050    
1051    p = buffer;    p = buffer;
1052    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1053    if (*p == 0) continue;    if (*p == 0) continue;
1054    
1055    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1056    complete, read more. */  
1057      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1058        {
1059        unsigned long int magic, get_options;
1060        uschar sbuf[8];
1061        FILE *f;
1062    
1063        p++;
1064        pp = p + (int)strlen((char *)p);
1065        while (isspace(pp[-1])) pp--;
1066        *pp = 0;
1067    
1068        f = fopen((char *)p, "rb");
1069        if (f == NULL)
1070          {
1071          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1072          continue;
1073          }
1074    
1075        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1076    
1077        true_size =
1078          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1079        true_study_size =
1080          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1081    
1082        re = (real_pcre *)new_malloc(true_size);
1083        regex_gotten_store = gotten_store;
1084    
1085        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1086    
1087        magic = ((real_pcre *)re)->magic_number;
1088        if (magic != MAGIC_NUMBER)
1089          {
1090          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1091            {
1092            do_flip = 1;
1093            }
1094          else
1095            {
1096            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1097            fclose(f);
1098            continue;
1099            }
1100          }
1101    
1102        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1103          do_flip? " (byte-inverted)" : "", p);
1104    
1105        /* Need to know if UTF-8 for printing data strings */
1106    
1107        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1108        use_utf8 = (get_options & PCRE_UTF8) != 0;
1109    
1110        /* Now see if there is any following study data */
1111    
1112        if (true_study_size != 0)
1113          {
1114          pcre_study_data *psd;
1115    
1116          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1117          extra->flags = PCRE_EXTRA_STUDY_DATA;
1118    
1119          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1120          extra->study_data = psd;
1121    
1122          if (fread(psd, 1, true_study_size, f) != true_study_size)
1123            {
1124            FAIL_READ:
1125            fprintf(outfile, "Failed to read data from %s\n", p);
1126            if (extra != NULL) new_free(extra);
1127            if (re != NULL) new_free(re);
1128            fclose(f);
1129            continue;
1130            }
1131          fprintf(outfile, "Study data loaded from %s\n", p);
1132          do_study = 1;     /* To get the data output if requested */
1133          }
1134        else fprintf(outfile, "No study data\n");
1135    
1136        fclose(f);
1137        goto SHOW_INFO;
1138        }
1139    
1140      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1141      the pattern; if is isn't complete, read more. */
1142    
1143    delimiter = *p++;    delimiter = *p++;
1144    
1145    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
1146      {      {
1147      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1148      goto SKIP_DATA;      goto SKIP_DATA;
1149      }      }
1150    
1151    pp = p;    pp = p;
1152      poffset = p - buffer;
1153    
1154    for(;;)    for(;;)
1155      {      {
1156      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
     if (*pp != 0) break;  
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
1157        {        {
1158        fprintf(outfile, "** Expression too long - missing delimiter?\n");        if (*pp == '\\' && pp[1] != 0) pp++;
1159        goto SKIP_DATA;          else if (*pp == delimiter) break;
1160          pp++;
1161        }        }
1162        if (*pp != 0) break;
1163      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if (fgets((char *)pp, len, infile) == NULL)  
1164        {        {
1165        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1166        done = 1;        done = 1;
1167        goto CONTINUE;        goto CONTINUE;
1168        }        }
1169      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1170      }      }
1171    
1172    /* Terminate the pattern at the delimiter */    /* The buffer may have moved while being extended; reset the start of data
1173      pointer to the correct relative point in the buffer. */
1174    
1175      p = buffer + poffset;
1176    
1177      /* If the first character after the delimiter is backslash, make
1178      the pattern end with backslash. This is purely to provide a way
1179      of testing for the error message when a pattern ends with backslash. */
1180    
1181      if (pp[1] == '\\') *pp++ = '\\';
1182    
1183      /* Terminate the pattern at the delimiter, and save a copy of the pattern
1184      for callouts. */
1185    
1186    *pp++ = 0;    *pp++ = 0;
1187      strcpy((char *)pbuffer, (char *)p);
1188    
1189    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1190    
1191    options = 0;    options = 0;
1192    study_options = 0;    study_options = 0;
1193      log_store = showstore;  /* default from command line */
1194    
1195    while (*pp != 0)    while (*pp != 0)
1196      {      {
1197      switch (*pp++)      switch (*pp++)
1198        {        {
1199          case 'f': options |= PCRE_FIRSTLINE; break;
1200          case 'g': do_g = 1; break;
1201        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1202        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1203        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1204        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1205    
1206          case '+': do_showrest = 1; break;
1207        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1208        case 'D': do_debug = 1; break;        case 'B': do_debug = 1; break;
1209          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1210          case 'D': do_debug = do_showinfo = 1; break;
1211        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1212          case 'F': do_flip = 1; break;
1213          case 'G': do_G = 1; break;
1214          case 'I': do_showinfo = 1; break;
1215          case 'J': options |= PCRE_DUPNAMES; break;
1216          case 'M': log_store = 1; break;
1217          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1218    
1219    #if !defined NOPOSIX
1220        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1221    #endif
1222    
1223        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1224        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
1225        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1226        case '\n': case ' ': break;        case 'Z': debug_lengths = 0; break;
1227          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1228          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1229    
1230          case 'L':
1231          ppp = pp;
1232          /* The '\r' test here is so that it works on Windows. */
1233          /* The '0' test is just in case this is an unterminated line. */
1234          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1235          *ppp = 0;
1236          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1237            {
1238            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1239            goto SKIP_DATA;
1240            }
1241          locale_set = 1;
1242          tables = pcre_maketables();
1243          pp = ppp;
1244          break;
1245    
1246          case '>':
1247          to_file = pp;
1248          while (*pp != 0) pp++;
1249          while (isspace(pp[-1])) pp--;
1250          *pp = 0;
1251          break;
1252    
1253          case '<':
1254            {
1255            if (strncmp((char *)pp, "JS>", 3) == 0)
1256              {
1257              options |= PCRE_JAVASCRIPT_COMPAT;
1258              pp += 3;
1259              }
1260            else
1261              {
1262              int x = check_newline(pp, outfile);
1263              if (x == 0) goto SKIP_DATA;
1264              options |= x;
1265              while (*pp++ != '>');
1266              }
1267            }
1268          break;
1269    
1270          case '\r':                      /* So that it works in Windows */
1271          case '\n':
1272          case ' ':
1273          break;
1274    
1275        default:        default:
1276        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1277        goto SKIP_DATA;        goto SKIP_DATA;
# Line 413  while (!done) Line 1279  while (!done)
1279      }      }
1280    
1281    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1282    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
1283      local character tables. */
1284    
1285    #if !defined NOPOSIX
1286    if (posix || do_posix)    if (posix || do_posix)
1287      {      {
1288      int rc;      int rc;
1289      int cflags = 0;      int cflags = 0;
1290    
1291      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1292      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1293        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1294        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1295        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1296    
1297      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1298    
1299      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 428  while (!done) Line 1301  while (!done)
1301    
1302      if (rc != 0)      if (rc != 0)
1303        {        {
1304        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1305        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1306        goto SKIP_DATA;        goto SKIP_DATA;
1307        }        }
# Line 437  while (!done) Line 1310  while (!done)
1310    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1311    
1312    else    else
1313    #endif  /* !defined NOPOSIX */
1314    
1315      {      {
1316      if (timeit)      if (timeit > 0)
1317        {        {
1318        register int i;        register int i;
1319        clock_t time_taken;        clock_t time_taken;
1320        clock_t start_time = clock();        clock_t start_time = clock();
1321        for (i = 0; i < 4000; i++)        for (i = 0; i < timeit; i++)
1322          {          {
1323          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1324          if (re != NULL) free(re);          if (re != NULL) free(re);
1325          }          }
1326        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1327        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1328          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)timeit) /
1329              (double)CLOCKS_PER_SEC);
1330        }        }
1331    
1332      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1333    
1334      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1335      if non-interactive. */      if non-interactive. */
# Line 466  while (!done) Line 1342  while (!done)
1342          {          {
1343          for (;;)          for (;;)
1344            {            {
1345            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1346              {              {
1347              done = 1;              done = 1;
1348              goto CONTINUE;              goto CONTINUE;
# Line 477  while (!done) Line 1353  while (!done)
1353            }            }
1354          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1355          }          }
1356        continue;        goto CONTINUE;
1357          }
1358    
1359        /* Compilation succeeded; print data if required. There are now two
1360        info-returning functions. The old one has a limited interface and
1361        returns only limited data. Check that it agrees with the newer one. */
1362    
1363        if (log_store)
1364          fprintf(outfile, "Memory allocation (code space): %d\n",
1365            (int)(gotten_store -
1366                  sizeof(real_pcre) -
1367                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1368    
1369        /* Extract the size for possible writing before possibly flipping it,
1370        and remember the store that was got. */
1371    
1372        true_size = ((real_pcre *)re)->size;
1373        regex_gotten_store = gotten_store;
1374    
1375        /* If /S was present, study the regexp to generate additional info to
1376        help with the matching. */
1377    
1378        if (do_study)
1379          {
1380          if (timeit > 0)
1381            {
1382            register int i;
1383            clock_t time_taken;
1384            clock_t start_time = clock();
1385            for (i = 0; i < timeit; i++)
1386              extra = pcre_study(re, study_options, &error);
1387            time_taken = clock() - start_time;
1388            if (extra != NULL) free(extra);
1389            fprintf(outfile, "  Study time %.4f milliseconds\n",
1390              (((double)time_taken * 1000.0) / (double)timeit) /
1391                (double)CLOCKS_PER_SEC);
1392            }
1393          extra = pcre_study(re, study_options, &error);
1394          if (error != NULL)
1395            fprintf(outfile, "Failed to study: %s\n", error);
1396          else if (extra != NULL)
1397            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1398          }
1399    
1400        /* If the 'F' option was present, we flip the bytes of all the integer
1401        fields in the regex data block and the study block. This is to make it
1402        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1403        compiled on a different architecture. */
1404    
1405        if (do_flip)
1406          {
1407          real_pcre *rre = (real_pcre *)re;
1408          rre->magic_number =
1409            byteflip(rre->magic_number, sizeof(rre->magic_number));
1410          rre->size = byteflip(rre->size, sizeof(rre->size));
1411          rre->options = byteflip(rre->options, sizeof(rre->options));
1412          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1413          rre->top_bracket =
1414            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1415          rre->top_backref =
1416            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1417          rre->first_byte =
1418            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1419          rre->req_byte =
1420            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1421          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1422            sizeof(rre->name_table_offset));
1423          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1424            sizeof(rre->name_entry_size));
1425          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1426            sizeof(rre->name_count));
1427    
1428          if (extra != NULL)
1429            {
1430            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1431            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1432            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1433            }
1434        }        }
1435    
1436      /* Compilation succeeded; print data if required */      /* Extract information from the compiled data if required */
1437    
1438        SHOW_INFO:
1439    
1440        if (do_debug)
1441          {
1442          fprintf(outfile, "------------------------------------------------------------------\n");
1443          pcre_printint(re, outfile, debug_lengths);
1444          }
1445    
1446      if (showinfo || do_debug)      if (do_showinfo)
1447        {        {
1448        int first_char, count;        unsigned long int get_options, all_options;
1449    #if !defined NOINFOCHECK
1450          int old_first_char, old_options, old_count;
1451    #endif
1452          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1453            hascrorlf;
1454          int nameentrysize, namecount;
1455          const uschar *nametable;
1456    
1457          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1458          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1459          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1460          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1461          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1462          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1463          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1464          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1465          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1466          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1467          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1468          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1469    
1470    #if !defined NOINFOCHECK
1471          old_count = pcre_info(re, &old_options, &old_first_char);
1472          if (count < 0) fprintf(outfile,
1473            "Error %d from pcre_info()\n", count);
1474          else
1475            {
1476            if (old_count != count) fprintf(outfile,
1477              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1478                old_count);
1479    
1480            if (old_first_char != first_char) fprintf(outfile,
1481              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1482                first_char, old_first_char);
1483    
1484            if (old_options != (int)get_options) fprintf(outfile,
1485              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1486                get_options, old_options);
1487            }
1488    #endif
1489    
1490          if (size != regex_gotten_store) fprintf(outfile,
1491            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1492            (int)size, (int)regex_gotten_store);
1493    
1494          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1495          if (backrefmax > 0)
1496            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1497    
1498          if (namecount > 0)
1499            {
1500            fprintf(outfile, "Named capturing subpatterns:\n");
1501            while (namecount-- > 0)
1502              {
1503              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1504                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1505                GET2(nametable, 0));
1506              nametable += nameentrysize;
1507              }
1508            }
1509    
1510          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1511          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1512    
1513          all_options = ((real_pcre *)re)->options;
1514          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1515    
1516          if (get_options == 0) fprintf(outfile, "No options\n");
1517            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1518              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1519              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1520              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1521              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1522              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1523              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1524              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1525              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1526              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1527              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1528              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1529              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1530              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1531              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1532              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1533    
1534          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1535    
1536          switch (get_options & PCRE_NEWLINE_BITS)
1537            {
1538            case PCRE_NEWLINE_CR:
1539            fprintf(outfile, "Forced newline sequence: CR\n");
1540            break;
1541    
1542            case PCRE_NEWLINE_LF:
1543            fprintf(outfile, "Forced newline sequence: LF\n");
1544            break;
1545    
1546            case PCRE_NEWLINE_CRLF:
1547            fprintf(outfile, "Forced newline sequence: CRLF\n");
1548            break;
1549    
1550            case PCRE_NEWLINE_ANYCRLF:
1551            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1552            break;
1553    
1554            case PCRE_NEWLINE_ANY:
1555            fprintf(outfile, "Forced newline sequence: ANY\n");
1556            break;
1557    
1558            default:
1559            break;
1560            }
1561    
1562          if (first_char == -1)
1563            {
1564            fprintf(outfile, "First char at start or follows newline\n");
1565            }
1566          else if (first_char < 0)
1567            {
1568            fprintf(outfile, "No first char\n");
1569            }
1570          else
1571            {
1572            int ch = first_char & 255;
1573            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1574              "" : " (caseless)";
1575            if (PRINTHEX(ch))
1576              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1577            else
1578              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1579            }
1580    
1581          if (need_char < 0)
1582            {
1583            fprintf(outfile, "No need char\n");
1584            }
1585          else
1586            {
1587            int ch = need_char & 255;
1588            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1589              "" : " (caseless)";
1590            if (PRINTHEX(ch))
1591              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1592            else
1593              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1594            }
1595    
1596        if (debug || do_debug) print_internals(re);        /* Don't output study size; at present it is in any case a fixed
1597          value, but it varies, depending on the computer architecture, and
1598          so messes up the test suite. (And with the /F option, it might be
1599          flipped.) */
1600    
1601        count = pcre_info(re, &options, &first_char);        if (do_study)
       if (count < 0) fprintf(outfile,  
         "Error %d while reading info\n", count);  
       else  
1602          {          {
1603          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (extra == NULL)
1604          if (options == 0) fprintf(outfile, "No options\n");            fprintf(outfile, "Study returned NULL\n");
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "");  
         if (first_char == -1)  
           {  
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
1605          else          else
1606            {            {
1607            if (isprint(first_char))            uschar *start_bits = NULL;
1608              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1609    
1610              if (start_bits == NULL)
1611                fprintf(outfile, "No starting byte set\n");
1612            else            else
1613              fprintf(outfile, "First char = %d\n", first_char);              {
1614                int i;
1615                int c = 24;
1616                fprintf(outfile, "Starting byte set: ");
1617                for (i = 0; i < 256; i++)
1618                  {
1619                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1620                    {
1621                    if (c > 75)
1622                      {
1623                      fprintf(outfile, "\n  ");
1624                      c = 2;
1625                      }
1626                    if (PRINTHEX(i) && i != ' ')
1627                      {
1628                      fprintf(outfile, "%c ", i);
1629                      c += 2;
1630                      }
1631                    else
1632                      {
1633                      fprintf(outfile, "\\x%02x ", i);
1634                      c += 5;
1635                      }
1636                    }
1637                  }
1638                fprintf(outfile, "\n");
1639                }
1640            }            }
1641          }          }
1642        }        }
1643    
1644      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1645      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1646        the study length, in big-endian order. */
1647    
1648      if (do_study)      if (to_file != NULL)
1649        {        {
1650        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1651          if (f == NULL)
1652          {          {
1653          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < 4000; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.2f milliseconds\n",  
           ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
1654          }          }
1655          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
1656          {          {
1657          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1658          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (uschar)((true_size >> 24) & 255);
1659            fprintf(outfile, "No starting character set\n");          sbuf[1] = (uschar)((true_size >> 16) & 255);
1660            sbuf[2] = (uschar)((true_size >>  8) & 255);
1661            sbuf[3] = (uschar)((true_size) & 255);
1662    
1663            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1664            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1665            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1666            sbuf[7] = (uschar)((true_study_size) & 255);
1667    
1668            if (fwrite(sbuf, 1, 8, f) < 8 ||
1669                fwrite(re, 1, true_size, f) < true_size)
1670              {
1671              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1672              }
1673          else          else
1674            {            {
1675            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1676            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1677              {              {
1678              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1679                    true_study_size)
1680                {                {
1681                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1682                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1683                }                }
1684                else fprintf(outfile, "Study data written to %s\n", to_file);
1685    
1686              }              }
           fprintf(outfile, "\n");  
1687            }            }
1688            fclose(f);
1689          }          }
1690    
1691          new_free(re);
1692          if (extra != NULL) new_free(extra);
1693          if (tables != NULL) new_free((void *)tables);
1694          continue;  /* With next regex */
1695        }        }
1696      }      }        /* End of non-POSIX compile */
1697    
1698    /* Read data lines and test them */    /* Read data lines and test them */
1699    
1700    for (;;)    for (;;)
1701      {      {
1702      unsigned char *q;      uschar *q;
1703        uschar *bptr;
1704        int *use_offsets = offsets;
1705        int use_size_offsets = size_offsets;
1706        int callout_data = 0;
1707        int callout_data_set = 0;
1708      int count, c;      int count, c;
1709      int offsets[30];      int copystrings = 0;
1710      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
1711        int getstrings = 0;
1712        int getlist = 0;
1713        int gmatched = 0;
1714        int start_offset = 0;
1715        int g_notempty = 0;
1716        int use_dfa = 0;
1717    
1718      options = 0;      options = 0;
1719    
1720      if (infile == stdin) printf("  data> ");      *copynames = 0;
1721      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1722    
1723        copynamesptr = copynames;
1724        getnamesptr = getnames;
1725    
1726        pcre_callout = callout;
1727        first_callout = 1;
1728        callout_extra = 0;
1729        callout_count = 0;
1730        callout_fail_count = 999999;
1731        callout_fail_id = -1;
1732        show_malloc = 0;
1733    
1734        if (extra != NULL) extra->flags &=
1735          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1736    
1737        len = 0;
1738        for (;;)
1739        {        {
1740        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1741        goto CONTINUE;          {
1742            if (len > 0) break;
1743            done = 1;
1744            goto CONTINUE;
1745            }
1746          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1747          len = (int)strlen((char *)buffer);
1748          if (buffer[len-1] == '\n') break;
1749        }        }
     if (infile != stdin) fprintf(outfile, (char *)buffer);  
1750    
     len = (int)strlen((char *)buffer);  
1751      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1752      buffer[len] = 0;      buffer[len] = 0;
1753      if (len == 0) break;      if (len == 0) break;
# Line 612  while (!done) Line 1755  while (!done)
1755      p = buffer;      p = buffer;
1756      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1757    
1758      q = dbuffer;      bptr = q = dbuffer;
1759      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1760        {        {
1761        int i = 0;        int i = 0;
1762        int n = 0;        int n = 0;
1763    
1764        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1765          {          {
1766          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 633  while (!done) Line 1777  while (!done)
1777          c -= '0';          c -= '0';
1778          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1779            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1780    
1781    #if !defined NOUTF8
1782            if (use_utf8 && c > 255)
1783              {
1784              unsigned char buff8[8];
1785              int ii, utn;
1786              utn = ord2utf8(c, buff8);
1787              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1788              c = buff8[ii];   /* Last byte */
1789              }
1790    #endif
1791          break;          break;
1792    
1793          case 'x':          case 'x':
1794    
1795            /* Handle \x{..} specially - new Perl thing for utf8 */
1796    
1797    #if !defined NOUTF8
1798            if (*p == '{')
1799              {
1800              unsigned char *pt = p;
1801              c = 0;
1802              while (isxdigit(*(++pt)))
1803                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1804              if (*pt == '}')
1805                {
1806                unsigned char buff8[8];
1807                int ii, utn;
1808                utn = ord2utf8(c, buff8);
1809                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1810                c = buff8[ii];   /* Last byte */
1811                p = pt + 1;
1812                break;
1813                }
1814              /* Not correct form; fall through */
1815              }
1816    #endif
1817    
1818            /* Ordinary \x */
1819    
1820          c = 0;          c = 0;
1821          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1822            {            {
# Line 644  while (!done) Line 1825  while (!done)
1825            }            }
1826          break;          break;
1827    
1828          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1829          p--;          p--;
1830          continue;          continue;
1831    
1832            case '>':
1833            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1834            continue;
1835    
1836          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1837          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1838          continue;          continue;
# Line 656  while (!done) Line 1841  while (!done)
1841          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1842          continue;          continue;
1843    
1844          case 'E':          case 'C':
1845          options |= PCRE_DOLLAR_ENDONLY;          if (isdigit(*p))    /* Set copy string */
1846              {
1847              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1848              copystrings |= 1 << n;
1849              }
1850            else if (isalnum(*p))
1851              {
1852              uschar *npp = copynamesptr;
1853              while (isalnum(*p)) *npp++ = *p++;
1854              *npp++ = 0;
1855              *npp = 0;
1856              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1857              if (n < 0)
1858                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1859              copynamesptr = npp;
1860              }
1861            else if (*p == '+')
1862              {
1863              callout_extra = 1;
1864              p++;
1865              }
1866            else if (*p == '-')
1867              {
1868              pcre_callout = NULL;
1869              p++;
1870              }
1871            else if (*p == '!')
1872              {
1873              callout_fail_id = 0;
1874              p++;
1875              while(isdigit(*p))
1876                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1877              callout_fail_count = 0;
1878              if (*p == '!')
1879                {
1880                p++;
1881                while(isdigit(*p))
1882                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1883                }
1884              }
1885            else if (*p == '*')
1886              {
1887              int sign = 1;
1888              callout_data = 0;
1889              if (*(++p) == '-') { sign = -1; p++; }
1890              while(isdigit(*p))
1891                callout_data = callout_data * 10 + *p++ - '0';
1892              callout_data *= sign;
1893              callout_data_set = 1;
1894              }
1895            continue;
1896    
1897    #if !defined NODFA
1898            case 'D':
1899    #if !defined NOPOSIX
1900            if (posix || do_posix)
1901              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1902            else
1903    #endif
1904              use_dfa = 1;
1905            continue;
1906    
1907            case 'F':
1908            options |= PCRE_DFA_SHORTEST;
1909            continue;
1910    #endif
1911    
1912            case 'G':
1913            if (isdigit(*p))
1914              {
1915              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1916              getstrings |= 1 << n;
1917              }
1918            else if (isalnum(*p))
1919              {
1920              uschar *npp = getnamesptr;
1921              while (isalnum(*p)) *npp++ = *p++;
1922              *npp++ = 0;
1923              *npp = 0;
1924              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1925              if (n < 0)
1926                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1927              getnamesptr = npp;
1928              }
1929          continue;          continue;
1930    
1931          case 'I':          case 'L':
1932          options |= PCRE_CASELESS;          getlist = 1;
1933          continue;          continue;
1934    
1935          case 'M':          case 'M':
1936          options |= PCRE_MULTILINE;          find_match_limit = 1;
1937          continue;          continue;
1938    
1939          case 'S':          case 'N':
1940          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1941          continue;          continue;
1942    
1943          case 'O':          case 'O':
1944          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1945          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1946              {
1947              size_offsets_max = n;
1948              free(offsets);
1949              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1950              if (offsets == NULL)
1951                {
1952                printf("** Failed to get %d bytes of memory for offsets vector\n",
1953                  (int)(size_offsets_max * sizeof(int)));
1954                yield = 1;
1955                goto EXIT;
1956                }
1957              }
1958            use_size_offsets = n;
1959            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1960            continue;
1961    
1962            case 'P':
1963            options |= PCRE_PARTIAL;
1964            continue;
1965    
1966            case 'Q':
1967            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1968            if (extra == NULL)
1969              {
1970              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1971              extra->flags = 0;
1972              }
1973            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1974            extra->match_limit_recursion = n;
1975            continue;
1976    
1977            case 'q':
1978            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1979            if (extra == NULL)
1980              {
1981              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1982              extra->flags = 0;
1983              }
1984            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1985            extra->match_limit = n;
1986            continue;
1987    
1988    #if !defined NODFA
1989            case 'R':
1990            options |= PCRE_DFA_RESTART;
1991            continue;
1992    #endif
1993    
1994            case 'S':
1995            show_malloc = 1;
1996          continue;          continue;
1997    
1998          case 'Z':          case 'Z':
1999          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2000          continue;          continue;
2001    
2002            case '?':
2003            options |= PCRE_NO_UTF8_CHECK;
2004            continue;
2005    
2006            case '<':
2007              {
2008              int x = check_newline(p, outfile);
2009              if (x == 0) goto NEXT_DATA;
2010              options |= x;
2011              while (*p++ != '>');
2012              }
2013            continue;
2014          }          }
2015        *q++ = c;        *q++ = c;
2016        }        }
2017      *q = 0;      *q = 0;
2018      len = q - dbuffer;      len = q - dbuffer;
2019    
2020        if ((all_use_dfa || use_dfa) && find_match_limit)
2021          {
2022          printf("**Match limit not relevant for DFA matching: ignored\n");
2023          find_match_limit = 0;
2024          }
2025    
2026      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2027      support timing. */      support timing or playing with the match limit or callout data. */
2028    
2029    #if !defined NOPOSIX
2030      if (posix || do_posix)      if (posix || do_posix)
2031        {        {
2032        int rc;        int rc;
2033        int eflags = 0;        int eflags = 0;
2034        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
2035          if (use_size_offsets > 0)
2036            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2037        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2038        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2039    
2040        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
2041    
2042        if (rc != 0)        if (rc != 0)
2043          {          {
2044          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2045          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2046          }          }
2047          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2048                  != 0)
2049            {
2050            fprintf(outfile, "Matched with REG_NOSUB\n");
2051            }
2052        else        else
2053          {          {
2054          size_t i;          size_t i;
2055          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2056            {            {
2057            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2058              {              {
2059              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
2060              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2061                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2062              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2063                if (i == 0 && do_showrest)
2064                  {
2065                  fprintf(outfile, " 0+ ");
2066                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2067                    outfile);
2068                  fprintf(outfile, "\n");
2069                  }
2070              }              }
2071            }            }
2072          }          }
2073          free(pmatch);
2074        }        }
2075    
2076      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
2077    
2078      else      else
2079    #endif  /* !defined NOPOSIX */
2080    
2081        for (;; gmatched++)    /* Loop for /g or /G */
2082        {        {
2083        if (timeit)        if (timeitm > 0)
2084          {          {
2085          register int i;          register int i;
2086          clock_t time_taken;          clock_t time_taken;
2087          clock_t start_time = clock();          clock_t start_time = clock();
2088          for (i = 0; i < 4000; i++)  
2089            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
2090              size_offsets);          if (all_use_dfa || use_dfa)
2091              {
2092              int workspace[1000];
2093              for (i = 0; i < timeitm; i++)
2094                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2095                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2096                  sizeof(workspace)/sizeof(int));
2097              }
2098            else
2099    #endif
2100    
2101            for (i = 0; i < timeitm; i++)
2102              count = pcre_exec(re, extra, (char *)bptr, len,
2103                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2104    
2105          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2106          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2107            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)timeitm) /
2108                (double)CLOCKS_PER_SEC);
2109            }
2110    
2111          /* If find_match_limit is set, we want to do repeated matches with
2112          varying limits in order to find the minimum value for the match limit and
2113          for the recursion limit. */
2114    
2115          if (find_match_limit)
2116            {
2117            if (extra == NULL)
2118              {
2119              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2120              extra->flags = 0;
2121              }
2122    
2123            (void)check_match_limit(re, extra, bptr, len, start_offset,
2124              options|g_notempty, use_offsets, use_size_offsets,
2125              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2126              PCRE_ERROR_MATCHLIMIT, "match()");
2127    
2128            count = check_match_limit(re, extra, bptr, len, start_offset,
2129              options|g_notempty, use_offsets, use_size_offsets,
2130              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2131              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2132            }
2133    
2134          /* If callout_data is set, use the interface with additional data */
2135    
2136          else if (callout_data_set)
2137            {
2138            if (extra == NULL)
2139              {
2140              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2141              extra->flags = 0;
2142              }
2143            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2144            extra->callout_data = &callout_data;
2145            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2146              options | g_notempty, use_offsets, use_size_offsets);
2147            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2148          }          }
2149    
2150        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
2151          size_offsets);        value of match_limit. */
2152    
2153    #if !defined NODFA
2154          else if (all_use_dfa || use_dfa)
2155            {
2156            int workspace[1000];
2157            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2158              options | g_notempty, use_offsets, use_size_offsets, workspace,
2159              sizeof(workspace)/sizeof(int));
2160            if (count == 0)
2161              {
2162              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2163              count = use_size_offsets/2;
2164              }
2165            }
2166    #endif
2167    
2168        if (count == 0)        else
2169          {          {
2170          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2171          count = size_offsets/2;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2172            if (count == 0)
2173              {
2174              fprintf(outfile, "Matched, but too many substrings\n");
2175              count = use_size_offsets/3;
2176              }
2177          }          }
2178    
2179          /* Matched */
2180    
2181        if (count >= 0)        if (count >= 0)
2182          {          {
2183          int i;          int i, maxcount;
2184          count *= 2;  
2185          for (i = 0; i < count; i += 2)  #if !defined NODFA
2186            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2187    #endif
2188              maxcount = use_size_offsets/3;
2189    
2190            /* This is a check against a lunatic return value. */
2191    
2192            if (count > maxcount)
2193              {
2194              fprintf(outfile,
2195                "** PCRE error: returned count %d is too big for offset size %d\n",
2196                count, use_size_offsets);
2197              count = use_size_offsets/3;
2198              if (do_g || do_G)
2199                {
2200                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2201                do_g = do_G = FALSE;        /* Break g/G loop */
2202                }
2203              }
2204    
2205            for (i = 0; i < count * 2; i += 2)
2206            {            {
2207            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2208              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2209            else            else
2210              {              {
2211              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2212              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2213                  use_offsets[i+1] - use_offsets[i], outfile);
2214              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2215                if (i == 0)
2216                  {
2217                  if (do_showrest)
2218                    {
2219                    fprintf(outfile, " 0+ ");
2220                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2221                      outfile);
2222                    fprintf(outfile, "\n");
2223                    }
2224                  }
2225                }
2226              }
2227    
2228            for (i = 0; i < 32; i++)
2229              {
2230              if ((copystrings & (1 << i)) != 0)
2231                {
2232                char copybuffer[256];
2233                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2234                  i, copybuffer, sizeof(copybuffer));
2235                if (rc < 0)
2236                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2237                else
2238                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2239                }
2240              }
2241    
2242            for (copynamesptr = copynames;
2243                 *copynamesptr != 0;
2244                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2245              {
2246              char copybuffer[256];
2247              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2248                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2249              if (rc < 0)
2250                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2251              else
2252                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2253              }
2254    
2255            for (i = 0; i < 32; i++)
2256              {
2257              if ((getstrings & (1 << i)) != 0)
2258                {
2259                const char *substring;
2260                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2261                  i, &substring);
2262                if (rc < 0)
2263                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
2264                else
2265                  {
2266                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2267                  pcre_free_substring(substring);
2268                  }
2269                }
2270              }
2271    
2272            for (getnamesptr = getnames;
2273                 *getnamesptr != 0;
2274                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2275              {
2276              const char *substring;
2277              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2278                count, (char *)getnamesptr, &substring);
2279              if (rc < 0)
2280                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2281              else
2282                {
2283                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2284                pcre_free_substring(substring);
2285              }              }
2286            }            }
2287    
2288            if (getlist)
2289              {
2290              const char **stringlist;
2291              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2292                &stringlist);
2293              if (rc < 0)
2294                fprintf(outfile, "get substring list failed %d\n", rc);
2295              else
2296                {
2297                for (i = 0; i < count; i++)
2298                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2299                if (stringlist[i] != NULL)
2300                  fprintf(outfile, "string list not terminated by NULL\n");
2301                /* free((void *)stringlist); */
2302                pcre_free_substring_list(stringlist);
2303                }
2304              }
2305            }
2306    
2307          /* There was a partial match */
2308    
2309          else if (count == PCRE_ERROR_PARTIAL)
2310            {
2311            fprintf(outfile, "Partial match");
2312    #if !defined NODFA
2313            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2314              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2315                bptr + use_offsets[0]);
2316    #endif
2317            fprintf(outfile, "\n");
2318            break;  /* Out of the /g loop */
2319          }          }
2320    
2321          /* Failed to match. If this is a /g or /G loop and we previously set
2322          g_notempty after a null match, this is not necessarily the end. We want
2323          to advance the start offset, and continue. We won't be at the end of the
2324          string - that was checked before setting g_notempty.
2325    
2326          Complication arises in the case when the newline option is "any" or
2327          "anycrlf". If the previous match was at the end of a line terminated by
2328          CRLF, an advance of one character just passes the \r, whereas we should
2329          prefer the longer newline sequence, as does the code in pcre_exec().
2330          Fudge the offset value to achieve this.
2331    
2332          Otherwise, in the case of UTF-8 matching, the advance must be one
2333          character, not one byte. */
2334    
2335        else        else
2336          {          {
2337          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2338              {
2339              int onechar = 1;
2340              unsigned int obits = ((real_pcre *)re)->options;
2341              use_offsets[0] = start_offset;
2342              if ((obits & PCRE_NEWLINE_BITS) == 0)
2343                {
2344                int d;
2345                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2346                obits = (d == '\r')? PCRE_NEWLINE_CR :
2347                        (d == '\n')? PCRE_NEWLINE_LF :
2348                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2349                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2350                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2351                }
2352              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2353                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2354                  &&
2355                  start_offset < len - 1 &&
2356                  bptr[start_offset] == '\r' &&
2357                  bptr[start_offset+1] == '\n')
2358                onechar++;
2359              else if (use_utf8)
2360                {
2361                while (start_offset + onechar < len)
2362                  {
2363                  int tb = bptr[start_offset+onechar];
2364                  if (tb <= 127) break;
2365                  tb &= 0xc0;
2366                  if (tb != 0 && tb != 0xc0) onechar++;
2367                  }
2368                }
2369              use_offsets[1] = start_offset + onechar;
2370              }
2371            else
2372              {
2373              if (count == PCRE_ERROR_NOMATCH)
2374                {
2375                if (gmatched == 0) fprintf(outfile, "No match\n");
2376                }
2377            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2378              break;  /* Out of the /g loop */
2379              }
2380          }          }
2381        }  
2382      }        /* If not /g or /G we are done */
2383    
2384          if (!do_g && !do_G) break;
2385    
2386          /* If we have matched an empty string, first check to see if we are at
2387          the end of the subject. If so, the /g loop is over. Otherwise, mimic
2388          what Perl's /g options does. This turns out to be rather cunning. First
2389          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2390          same point. If this fails (picked up above) we advance to the next
2391          character. */
2392    
2393          g_notempty = 0;
2394    
2395          if (use_offsets[0] == use_offsets[1])
2396            {
2397            if (use_offsets[0] == len) break;
2398            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2399            }
2400    
2401          /* For /g, update the start offset, leaving the rest alone */
2402    
2403          if (do_g) start_offset = use_offsets[1];
2404    
2405          /* For /G, update the pointer and length */
2406    
2407          else
2408            {
2409            bptr += use_offsets[1];
2410            len -= use_offsets[1];
2411            }
2412          }  /* End of loop for /g and /G */
2413    
2414        NEXT_DATA: continue;
2415        }    /* End of loop for data lines */
2416    
2417    CONTINUE:    CONTINUE:
2418    
2419    #if !defined NOPOSIX
2420    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2421    if (re != NULL) free(re);  #endif
2422    if (extra != NULL) free(extra);  
2423      if (re != NULL) new_free(re);
2424      if (extra != NULL) new_free(extra);
2425      if (tables != NULL)
2426        {
2427        new_free((void *)tables);
2428        setlocale(LC_CTYPE, "C");
2429        locale_set = 0;
2430        }
2431    }    }
2432    
2433  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2434  return 0;  
2435    EXIT:
2436    
2437    if (infile != NULL && infile != stdin) fclose(infile);
2438    if (outfile != NULL && outfile != stdout) fclose(outfile);
2439    
2440    free(buffer);
2441    free(dbuffer);
2442    free(pbuffer);
2443    free(offsets);
2444    
2445    return yield;
2446  }  }
2447    
2448  /* End */  /* End of pcretest.c */

Legend:
Removed from v.13  
changed lines
  Added in v.345

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12