/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 19 by nigel, Sat Feb 24 21:38:33 2007 UTC revision 391 by ph10, Tue Mar 17 21:16:01 2009 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48    #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60    /* A number of things vary for Windows builds. Originally, pcretest opened its
61    input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #define isatty _isatty         /* This is what Windows calls them, I'm told */
75    #define fileno _fileno
76    
77    #else
78    #include <sys/time.h>          /* These two includes are needed */
79    #include <sys/resource.h>      /* for setrlimit(). */
80    #define INPUT_MODE   "rb"
81    #define OUTPUT_MODE  "wb"
82    #endif
83    
84    
85  /* Use the internal info for displaying the results of pcre_study(). */  /* We have to include pcre_internal.h because we need the internal info for
86    displaying the results of pcre_study() and we also need to know about the
87    internal macros, structures, and other internal data values; pcretest has
88    "inside information" compared to a program that strictly follows the PCRE API.
89    
90    Although pcre_internal.h does itself include pcre.h, we explicitly include it
91    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92    appropriately for an application, not for building PCRE. */
93    
94    #include "pcre.h"
95    #include "pcre_internal.h"
96    
97    /* We need access to some of the data tables that PCRE uses. So as not to have
98    to keep two copies, we include the source file here, changing the names of the
99    external symbols to prevent clashes. */
100    
101    #define _pcre_ucp_gentype      ucp_gentype
102    #define _pcre_utf8_table1      utf8_table1
103    #define _pcre_utf8_table1_size utf8_table1_size
104    #define _pcre_utf8_table2      utf8_table2
105    #define _pcre_utf8_table3      utf8_table3
106    #define _pcre_utf8_table4      utf8_table4
107    #define _pcre_utt              utt
108    #define _pcre_utt_size         utt_size
109    #define _pcre_utt_names        utt_names
110    #define _pcre_OP_lengths       OP_lengths
111    
112    #include "pcre_tables.c"
113    
114    /* We also need the pcre_printint() function for printing out compiled
115    patterns. This function is in a separate file so that it can be included in
116    pcre_compile.c when that module is compiled with debugging enabled.
117    
118    The definition of the macro PRINTABLE, which determines whether to print an
119    output character as-is or as a hex value when showing compiled patterns, is
120    contained in this file. We uses it here also, in cases when the locale has not
121    been explicitly changed, so as to get consistent output from systems that
122    differ in their output from isprint() even in the "C" locale. */
123    
124    #include "pcre_printint.src"
125    
126    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127    
128    
129    /* It is possible to compile this test program without including support for
130    testing the POSIX interface, though this is not available via the standard
131    Makefile. */
132    
133  #include "internal.h"  #if !defined NOPOSIX
134  #include "pcreposix.h"  #include "pcreposix.h"
135    #endif
136    
137    /* It is also possible, for the benefit of the version currently imported into
138    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139    interface to the DFA matcher (NODFA), and without the doublecheck of the old
140    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141    UTF8 support if PCRE is built without it. */
142    
143    #ifndef SUPPORT_UTF8
144    #ifndef NOUTF8
145    #define NOUTF8
146    #endif
147    #endif
148    
149    
150    /* Other parameters */
151    
152  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
153  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 157 
157  #endif  #endif
158  #endif  #endif
159    
160    /* This is the default loop count for timing. */
161    
162    #define LOOPREPEAT 500000
163    
164    /* Static variables */
165    
166  static FILE *outfile;  static FILE *outfile;
167  static int log_store = 0;  static int log_store = 0;
168    static int callout_count;
169    static int callout_extra;
170    static int callout_fail_count;
171    static int callout_fail_id;
172    static int debug_lengths;
173    static int first_callout;
174    static int locale_set = 0;
175    static int show_malloc;
176    static int use_utf8;
177    static size_t gotten_store;
178    
179    /* The buffers grow automatically if very long input lines are encountered. */
180    
181    static int buffer_size = 50000;
182    static uschar *buffer = NULL;
183    static uschar *dbuffer = NULL;
184    static uschar *pbuffer = NULL;
185    
186    
187    
188  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
189  code as contained in pcre.c under the DEBUG macro. */  *        Read or extend an input line            *
190    *************************************************/
191    
192  static const char *OP_names[] = {  /* Input lines are read into buffer, but both patterns and data lines can be
193    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  continued over multiple input lines. In addition, if the buffer fills up, we
194    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  want to automatically expand it so as to be able to handle extremely large
195    "not",  lines that are needed for certain stress tests. When the input buffer is
196    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  expanded, the other two buffers must also be expanded likewise, and the
197    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  contents of pbuffer, which are a copy of the input for callouts, must be
198    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  preserved (for when expansion happens for a data line). This is not the most
199    "*", "*?", "+", "+?", "?", "??", "{", "{",  optimal way of handling this, but hey, this is just a test program!
200    "class", "negclass", "Ref",  
201    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  Arguments:
202    "Brazero", "Braminzero", "Bra"    f            the file to read
203  };    start        where in buffer to start (this *must* be within buffer)
204      prompt       for stdin or readline()
205    
206  static void print_internals(pcre *re)  Returns:       pointer to the start of new data
207  {                 could be a copy of start, or could be moved
208  unsigned char *code = ((real_pcre *)re)->code;                 NULL if no data read and EOF reached
209    */
 printf("------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   printf("%3d ", code - ((real_pcre *)re)->code);  
   
   if (*code >= OP_BRA)  
     {  
     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     printf("    %s\n", OP_names[*code]);  
     printf("------------------------------------------------------------------\n");  
     return;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     printf("%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       printf("    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) printf("    %c", c);  
       else printf("    \\x%02x", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf("0,");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     printf("    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
     case OP_NEGCLASS:  
       {  
       int i, min, max;  
       if (*code++ == OP_CLASS) printf("    [");  
         else printf("   ^[");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
210    
211          case OP_CRRANGE:  static uschar *
212          case OP_CRMINRANGE:  extend_inputline(FILE *f, uschar *start, const char *prompt)
213          min = (code[1] << 8) + code[2];  {
214          max = (code[3] << 8) + code[4];  uschar *here = start;
         if (max == 0) printf("{%d,}", min);  
         else printf("{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) printf("?");  
         code += 4;  
         break;  
215    
216          default:  for (;;)
217          code--;    {
218          }    int rlen = buffer_size - (here - buffer);
219    
220      if (rlen > 1000)
221        {
222        int dlen;
223    
224        /* If libreadline support is required, use readline() to read a line if the
225        input is a terminal. Note that readline() removes the trailing newline, so
226        we must put it back again, to be compatible with fgets(). */
227    
228    #ifdef SUPPORT_LIBREADLINE
229        if (isatty(fileno(f)))
230          {
231          size_t len;
232          char *s = readline(prompt);
233          if (s == NULL) return (here == start)? NULL : start;
234          len = strlen(s);
235          if (len > 0) add_history(s);
236          if (len > rlen - 1) len = rlen - 1;
237          memcpy(here, s, len);
238          here[len] = '\n';
239          here[len+1] = 0;
240          free(s);
241        }        }
242      break;      else
243    #endif
244    
245        /* Read the next line by normal means, prompting if the file is stdin. */
246    
247      /* Anything else is just a one-node item */        {
248          if (f == stdin) printf(prompt);
249          if (fgets((char *)here, rlen,  f) == NULL)
250            return (here == start)? NULL : start;
251          }
252    
253      default:      dlen = (int)strlen((char *)here);
254      printf("    %s", OP_names[*code]);      if (dlen > 0 && here[dlen - 1] == '\n') return start;
255      break;      here += dlen;
256      }      }
257    
258    code++;    else
259    printf("\n");      {
260        int new_buffer_size = 2*buffer_size;
261        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264    
265        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266          {
267          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268          exit(1);
269          }
270    
271        memcpy(new_buffer, buffer, buffer_size);
272        memcpy(new_pbuffer, pbuffer, buffer_size);
273    
274        buffer_size = new_buffer_size;
275    
276        start = new_buffer + (start - buffer);
277        here = new_buffer + (here - buffer);
278    
279        free(buffer);
280        free(dbuffer);
281        free(pbuffer);
282    
283        buffer = new_buffer;
284        dbuffer = new_dbuffer;
285        pbuffer = new_pbuffer;
286        }
287    }    }
288    
289    return NULL;  /* Control never gets here */
290  }  }
291    
292    
293    
 /* Character string printing function. */  
294    
295  static void pchars(unsigned char *p, int length)  
296    
297    
298    /*************************************************
299    *          Read number from string               *
300    *************************************************/
301    
302    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303    around with conditional compilation, just do the job by hand. It is only used
304    for unpicking arguments, so just keep it simple.
305    
306    Arguments:
307      str           string to be converted
308      endptr        where to put the end pointer
309    
310    Returns:        the unsigned long
311    */
312    
313    static int
314    get_value(unsigned char *str, unsigned char **endptr)
315    {
316    int result = 0;
317    while(*str != 0 && isspace(*str)) str++;
318    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
319    *endptr = str;
320    return(result);
321    }
322    
323    
324    
325    
326    /*************************************************
327    *            Convert UTF-8 string to value       *
328    *************************************************/
329    
330    /* This function takes one or more bytes that represents a UTF-8 character,
331    and returns the value of the character.
332    
333    Argument:
334      utf8bytes   a pointer to the byte vector
335      vptr        a pointer to an int to receive the value
336    
337    Returns:      >  0 => the number of bytes consumed
338                  -6 to 0 => malformed UTF-8 character at offset = (-return)
339    */
340    
341    #if !defined NOUTF8
342    
343    static int
344    utf82ord(unsigned char *utf8bytes, int *vptr)
345  {  {
346  int c;  int c = *utf8bytes++;
347    int d = c;
348    int i, j, s;
349    
350    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
351      {
352      if ((d & 0x80) == 0) break;
353      d <<= 1;
354      }
355    
356    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
357    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
358    
359    /* i now has a value in the range 1-5 */
360    
361    s = 6*i;
362    d = (c & utf8_table3[i]) << s;
363    
364    for (j = 0; j < i; j++)
365      {
366      c = *utf8bytes++;
367      if ((c & 0xc0) != 0x80) return -(j+1);
368      s -= 6;
369      d |= (c & 0x3f) << s;
370      }
371    
372    /* Check that encoding was the correct unique one */
373    
374    for (j = 0; j < utf8_table1_size; j++)
375      if (d <= utf8_table1[j]) break;
376    if (j != i) return -(i+1);
377    
378    /* Valid value */
379    
380    *vptr = d;
381    return i+1;
382    }
383    
384    #endif
385    
386    
387    
388    /*************************************************
389    *       Convert character value to UTF-8         *
390    *************************************************/
391    
392    /* This function takes an integer value in the range 0 - 0x7fffffff
393    and encodes it as a UTF-8 character in 0 to 6 bytes.
394    
395    Arguments:
396      cvalue     the character value
397      utf8bytes  pointer to buffer for result - at least 6 bytes long
398    
399    Returns:     number of characters placed in the buffer
400    */
401    
402    #if !defined NOUTF8
403    
404    static int
405    ord2utf8(int cvalue, uschar *utf8bytes)
406    {
407    register int i, j;
408    for (i = 0; i < utf8_table1_size; i++)
409      if (cvalue <= utf8_table1[i]) break;
410    utf8bytes += i;
411    for (j = i; j > 0; j--)
412     {
413     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414     cvalue >>= 6;
415     }
416    *utf8bytes = utf8_table2[i] | cvalue;
417    return i + 1;
418    }
419    
420    #endif
421    
422    
423    
424    /*************************************************
425    *             Print character string             *
426    *************************************************/
427    
428    /* Character string printing function. Must handle UTF-8 strings in utf8
429    mode. Yields number of characters printed. If handed a NULL file, just counts
430    chars without printing. */
431    
432    static int pchars(unsigned char *p, int length, FILE *f)
433    {
434    int c = 0;
435    int yield = 0;
436    
437  while (length-- > 0)  while (length-- > 0)
438    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
439      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
440      if (use_utf8)
441        {
442        int rc = utf82ord(p, &c);
443    
444        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
445          {
446          length -= rc - 1;
447          p += rc;
448          if (PRINTHEX(c))
449            {
450            if (f != NULL) fprintf(f, "%c", c);
451            yield++;
452            }
453          else
454            {
455            int n = 4;
456            if (f != NULL) fprintf(f, "\\x{%02x}", c);
457            yield += (n <= 0x000000ff)? 2 :
458                     (n <= 0x00000fff)? 3 :
459                     (n <= 0x0000ffff)? 4 :
460                     (n <= 0x000fffff)? 5 : 6;
461            }
462          continue;
463          }
464        }
465    #endif
466    
467       /* Not UTF-8, or malformed UTF-8  */
468    
469      c = *p++;
470      if (PRINTHEX(c))
471        {
472        if (f != NULL) fprintf(f, "%c", c);
473        yield++;
474        }
475      else
476        {
477        if (f != NULL) fprintf(f, "\\x%02x", c);
478        yield += 4;
479        }
480      }
481    
482    return yield;
483  }  }
484    
485    
486    
487    /*************************************************
488    *              Callout function                  *
489    *************************************************/
490    
491    /* Called from PCRE as a result of the (?C) item. We print out where we are in
492    the match. Yield zero unless more callouts than the fail count, or the callout
493    data is not zero. */
494    
495    static int callout(pcre_callout_block *cb)
496    {
497    FILE *f = (first_callout | callout_extra)? outfile : NULL;
498    int i, pre_start, post_start, subject_length;
499    
500    if (callout_extra)
501      {
502      fprintf(f, "Callout %d: last capture = %d\n",
503        cb->callout_number, cb->capture_last);
504    
505      for (i = 0; i < cb->capture_top * 2; i += 2)
506        {
507        if (cb->offset_vector[i] < 0)
508          fprintf(f, "%2d: <unset>\n", i/2);
509        else
510          {
511          fprintf(f, "%2d: ", i/2);
512          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
513            cb->offset_vector[i+1] - cb->offset_vector[i], f);
514          fprintf(f, "\n");
515          }
516        }
517      }
518    
519    /* Re-print the subject in canonical form, the first time or if giving full
520    datails. On subsequent calls in the same match, we use pchars just to find the
521    printed lengths of the substrings. */
522    
523    if (f != NULL) fprintf(f, "--->");
524    
525    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
526    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527      cb->current_position - cb->start_match, f);
528    
529    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530    
531    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532      cb->subject_length - cb->current_position, f);
533    
534    if (f != NULL) fprintf(f, "\n");
535    
536    /* Always print appropriate indicators, with callout number if not already
537    shown. For automatic callouts, show the pattern offset. */
538    
539    if (cb->callout_number == 255)
540      {
541      fprintf(outfile, "%+3d ", cb->pattern_position);
542      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
543      }
544    else
545      {
546      if (callout_extra) fprintf(outfile, "    ");
547        else fprintf(outfile, "%3d ", cb->callout_number);
548      }
549    
550    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551    fprintf(outfile, "^");
552    
553    if (post_start > 0)
554      {
555      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
556      fprintf(outfile, "^");
557      }
558    
559    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560      fprintf(outfile, " ");
561    
562    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563      pbuffer + cb->pattern_position);
564    
565    fprintf(outfile, "\n");
566    first_callout = 0;
567    
568    if (cb->callout_data != NULL)
569      {
570      int callout_data = *((int *)(cb->callout_data));
571      if (callout_data != 0)
572        {
573        fprintf(outfile, "Callout data = %d\n", callout_data);
574        return callout_data;
575        }
576      }
577    
578    return (cb->callout_number != callout_fail_id)? 0 :
579           (++callout_count >= callout_fail_count)? 1 : 0;
580    }
581    
582    
583    /*************************************************
584    *            Local malloc functions              *
585    *************************************************/
586    
587  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
588  compiled re. */  compiled re. */
589    
590  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
591  {  {
592  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
593  return malloc(size);  gotten_store = size;
594    if (show_malloc)
595      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
596    return block;
597    }
598    
599    static void new_free(void *block)
600    {
601    if (show_malloc)
602      fprintf(outfile, "free             %p\n", block);
603    free(block);
604    }
605    
606    
607    /* For recursion malloc/free, to test stacking calls */
608    
609    static void *stack_malloc(size_t size)
610    {
611    void *block = malloc(size);
612    if (show_malloc)
613      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614    return block;
615  }  }
616    
617    static void stack_free(void *block)
618    {
619    if (show_malloc)
620      fprintf(outfile, "stack_free       %p\n", block);
621    free(block);
622    }
623    
624    
625    /*************************************************
626    *          Call pcre_fullinfo()                  *
627    *************************************************/
628    
629    /* Get one piece of information from the pcre_fullinfo() function */
630    
631    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
632    {
633    int rc;
634    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
635      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
636    }
637    
638    
639    
640    /*************************************************
641    *         Byte flipping function                 *
642    *************************************************/
643    
644    static unsigned long int
645    byteflip(unsigned long int value, int n)
646    {
647    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648    return ((value & 0x000000ff) << 24) |
649           ((value & 0x0000ff00) <<  8) |
650           ((value & 0x00ff0000) >>  8) |
651           ((value & 0xff000000) >> 24);
652    }
653    
654    
655    
656    
657    /*************************************************
658    *        Check match or recursion limit          *
659    *************************************************/
660    
661    static int
662    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663      int start_offset, int options, int *use_offsets, int use_size_offsets,
664      int flag, unsigned long int *limit, int errnumber, const char *msg)
665    {
666    int count;
667    int min = 0;
668    int mid = 64;
669    int max = -1;
670    
671    extra->flags |= flag;
672    
673    for (;;)
674      {
675      *limit = mid;
676    
677      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678        use_offsets, use_size_offsets);
679    
680      if (count == errnumber)
681        {
682        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683        min = mid;
684        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685        }
686    
687      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688                             count == PCRE_ERROR_PARTIAL)
689        {
690        if (mid == min + 1)
691          {
692          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693          break;
694          }
695        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696        max = mid;
697        mid = (min + mid)/2;
698        }
699      else break;    /* Some other error */
700      }
701    
702    extra->flags &= ~flag;
703    return count;
704    }
705    
706    
707    
708    /*************************************************
709    *         Case-independent strncmp() function    *
710    *************************************************/
711    
712    /*
713    Arguments:
714      s         first string
715      t         second string
716      n         number of characters to compare
717    
718    Returns:    < 0, = 0, or > 0, according to the comparison
719    */
720    
721    static int
722    strncmpic(uschar *s, uschar *t, int n)
723    {
724    while (n--)
725      {
726      int c = tolower(*s++) - tolower(*t++);
727      if (c) return c;
728      }
729    return 0;
730    }
731    
732    
733    
734    /*************************************************
735    *         Check newline indicator                *
736    *************************************************/
737    
738    /* This is used both at compile and run-time to check for <xxx> escapes, where
739    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740    no match.
741    
742    Arguments:
743      p           points after the leading '<'
744      f           file for error message
745    
746    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
747    */
748    
749    static int
750    check_newline(uschar *p, FILE *f)
751    {
752    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759    fprintf(f, "Unknown newline type at: <%s\n", p);
760    return 0;
761    }
762    
763    
764    
765    /*************************************************
766    *             Usage function                     *
767    *************************************************/
768    
769    static void
770    usage(void)
771    {
772    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
773    printf("Input and output default to stdin and stdout.\n");
774    #ifdef SUPPORT_LIBREADLINE
775    printf("If input is a terminal, readline() is used to read from it.\n");
776    #else
777    printf("This version of pcretest is not linked with readline().\n");
778    #endif
779    printf("\nOptions:\n");
780    printf("  -b       show compiled code (bytecode)\n");
781    printf("  -C       show PCRE compile-time options and exit\n");
782    printf("  -d       debug: show compiled code and information (-b and -i)\n");
783    #if !defined NODFA
784    printf("  -dfa     force DFA matching for all subjects\n");
785    #endif
786    printf("  -help    show usage information\n");
787    printf("  -i       show information about compiled patterns\n"
788           "  -M       find MATCH_LIMIT minimum for each subject\n"
789           "  -m       output memory used information\n"
790           "  -o <n>   set size of offsets vector to <n>\n");
791    #if !defined NOPOSIX
792    printf("  -p       use POSIX interface\n");
793    #endif
794    printf("  -q       quiet: do not output PCRE version number at start\n");
795    printf("  -S <n>   set stack size to <n> megabytes\n");
796    printf("  -s       output store (memory) used information\n"
797           "  -t       time compilation and execution\n");
798    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
799    printf("  -tm      time execution (matching) only\n");
800    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
801    }
802    
803    
804    
805    /*************************************************
806    *                Main Program                    *
807    *************************************************/
808    
809  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
810  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 263  int main(int argc, char **argv) Line 815  int main(int argc, char **argv)
815  FILE *infile = stdin;  FILE *infile = stdin;
816  int options = 0;  int options = 0;
817  int study_options = 0;  int study_options = 0;
818    int default_find_match_limit = FALSE;
819  int op = 1;  int op = 1;
820  int timeit = 0;  int timeit = 0;
821    int timeitm = 0;
822  int showinfo = 0;  int showinfo = 0;
823    int showstore = 0;
824    int quiet = 0;
825    int size_offsets = 45;
826    int size_offsets_max;
827    int *offsets = NULL;
828    #if !defined NOPOSIX
829  int posix = 0;  int posix = 0;
830    #endif
831  int debug = 0;  int debug = 0;
832  int done = 0;  int done = 0;
833  unsigned char buffer[30000];  int all_use_dfa = 0;
834  unsigned char dbuffer[1024];  int yield = 0;
835    int stack_size;
836    
837    /* These vectors store, end-to-end, a list of captured substring names. Assume
838    that 1024 is plenty long enough for the few names we'll be testing. */
839    
840  /* Static so that new_malloc can use it. */  uschar copynames[1024];
841    uschar getnames[1024];
842    
843    uschar *copynamesptr;
844    uschar *getnamesptr;
845    
846    /* Get buffers from malloc() so that Electric Fence will check their misuse
847    when I am debugging. They grow automatically when very long lines are read. */
848    
849    buffer = (unsigned char *)malloc(buffer_size);
850    dbuffer = (unsigned char *)malloc(buffer_size);
851    pbuffer = (unsigned char *)malloc(buffer_size);
852    
853    /* The outfile variable is static so that new_malloc can use it. */
854    
855  outfile = stdout;  outfile = stdout;
856    
857    /* The following  _setmode() stuff is some Windows magic that tells its runtime
858    library to translate CRLF into a single LF character. At least, that's what
859    I've been told: never having used Windows I take this all on trust. Originally
860    it set 0x8000, but then I was advised that _O_BINARY was better. */
861    
862    #if defined(_WIN32) || defined(WIN32)
863    _setmode( _fileno( stdout ), _O_BINARY );
864    #endif
865    
866  /* Scan options */  /* Scan options */
867    
868  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
869    {    {
870    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
871    else if (strcmp(argv[op], "-t") == 0) timeit = 1;  
872      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
873        showstore = 1;
874      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
875      else if (strcmp(argv[op], "-b") == 0) debug = 1;
876    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
877    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
878      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
879    #if !defined NODFA
880      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
881    #endif
882      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
883          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
884            *endptr == 0))
885        {
886        op++;
887        argc--;
888        }
889      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
890        {
891        int both = argv[op][2] == 0;
892        int temp;
893        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
894                         *endptr == 0))
895          {
896          timeitm = temp;
897          op++;
898          argc--;
899          }
900        else timeitm = LOOPREPEAT;
901        if (both) timeit = timeitm;
902        }
903      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
904          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
905            *endptr == 0))
906        {
907    #if defined(_WIN32) || defined(WIN32)
908        printf("PCRE: -S not supported on this OS\n");
909        exit(1);
910    #else
911        int rc;
912        struct rlimit rlim;
913        getrlimit(RLIMIT_STACK, &rlim);
914        rlim.rlim_cur = stack_size * 1024 * 1024;
915        rc = setrlimit(RLIMIT_STACK, &rlim);
916        if (rc != 0)
917          {
918        printf("PCRE: setrlimit() failed with error %d\n", rc);
919        exit(1);
920          }
921        op++;
922        argc--;
923    #endif
924        }
925    #if !defined NOPOSIX
926    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
927    #endif
928      else if (strcmp(argv[op], "-C") == 0)
929        {
930        int rc;
931        unsigned long int lrc;
932        printf("PCRE version %s\n", pcre_version());
933        printf("Compiled with\n");
934        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
935        printf("  %sUTF-8 support\n", rc? "" : "No ");
936        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
937        printf("  %sUnicode properties support\n", rc? "" : "No ");
938        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
939        /* Note that these values are always the ASCII values, even
940        in EBCDIC environments. CR is 13 and NL is 10. */
941        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
942          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
943          (rc == -2)? "ANYCRLF" :
944          (rc == -1)? "ANY" : "???");
945        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
946        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
947                                         "all Unicode newlines");
948        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
949        printf("  Internal link size = %d\n", rc);
950        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
951        printf("  POSIX malloc threshold = %d\n", rc);
952        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
953        printf("  Default match limit = %ld\n", lrc);
954        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
955        printf("  Default recursion depth limit = %ld\n", lrc);
956        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
957        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
958        goto EXIT;
959        }
960      else if (strcmp(argv[op], "-help") == 0 ||
961               strcmp(argv[op], "--help") == 0)
962        {
963        usage();
964        goto EXIT;
965        }
966    else    else
967      {      {
968      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
969      return 1;      usage();
970        yield = 1;
971        goto EXIT;
972      }      }
973    op++;    op++;
974    argc--;    argc--;
975    }    }
976    
977    /* Get the store for the offsets vector, and remember what it was */
978    
979    size_offsets_max = size_offsets;
980    offsets = (int *)malloc(size_offsets_max * sizeof(int));
981    if (offsets == NULL)
982      {
983      printf("** Failed to get %d bytes of memory for offsets vector\n",
984        (int)(size_offsets_max * sizeof(int)));
985      yield = 1;
986      goto EXIT;
987      }
988    
989  /* Sort out the input and output files */  /* Sort out the input and output files */
990    
991  if (argc > 1)  if (argc > 1)
992    {    {
993    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
994    if (infile == NULL)    if (infile == NULL)
995      {      {
996      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
997      return 1;      yield = 1;
998        goto EXIT;
999      }      }
1000    }    }
1001    
1002  if (argc > 2)  if (argc > 2)
1003    {    {
1004    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1005    if (outfile == NULL)    if (outfile == NULL)
1006      {      {
1007      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1008      return 1;      yield = 1;
1009        goto EXIT;
1010      }      }
1011    }    }
1012    
1013  /* Set alternative malloc function */  /* Set alternative malloc function */
1014    
1015  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1016    pcre_free = new_free;
1017    pcre_stack_malloc = stack_malloc;
1018    pcre_stack_free = stack_free;
1019    
1020  /* Heading line, then prompt for first re if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1021    
1022  fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
 fprintf(outfile, "PCRE version %s\n\n", pcre_version());  
1023    
1024  /* Main loop */  /* Main loop */
1025    
# Line 331  while (!done) Line 1027  while (!done)
1027    {    {
1028    pcre *re = NULL;    pcre *re = NULL;
1029    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
1030    
1031    #if !defined NOPOSIX  /* There are still compilers that require no indent */
1032    regex_t preg;    regex_t preg;
   const char *error;  
   unsigned char *p, *pp;  
   int do_study = 0;  
   int do_debug = 0;  
1033    int do_posix = 0;    int do_posix = 0;
1034    int erroroffset, len, delimiter;  #endif
1035    
1036    if (infile == stdin) printf("  re> ");    const char *error;
1037    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    unsigned char *p, *pp, *ppp;
1038    if (infile != stdin) fprintf(outfile, (char *)buffer);    unsigned char *to_file = NULL;
1039      const unsigned char *tables = NULL;
1040      unsigned long int true_size, true_study_size = 0;
1041      size_t size, regex_gotten_store;
1042      int do_study = 0;
1043      int do_debug = debug;
1044      int do_G = 0;
1045      int do_g = 0;
1046      int do_showinfo = showinfo;
1047      int do_showrest = 0;
1048      int do_flip = 0;
1049      int erroroffset, len, delimiter, poffset;
1050    
1051      use_utf8 = 0;
1052      debug_lengths = 1;
1053    
1054      if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
1055      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1056      fflush(outfile);
1057    
1058    p = buffer;    p = buffer;
1059    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1060    if (*p == 0) continue;    if (*p == 0) continue;
1061    
1062    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1063    complete, read more. */  
1064      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1065        {
1066        unsigned long int magic, get_options;
1067        uschar sbuf[8];
1068        FILE *f;
1069    
1070        p++;
1071        pp = p + (int)strlen((char *)p);
1072        while (isspace(pp[-1])) pp--;
1073        *pp = 0;
1074    
1075        f = fopen((char *)p, "rb");
1076        if (f == NULL)
1077          {
1078          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1079          continue;
1080          }
1081    
1082        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1083    
1084        true_size =
1085          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1086        true_study_size =
1087          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1088    
1089        re = (real_pcre *)new_malloc(true_size);
1090        regex_gotten_store = gotten_store;
1091    
1092        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1093    
1094        magic = ((real_pcre *)re)->magic_number;
1095        if (magic != MAGIC_NUMBER)
1096          {
1097          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1098            {
1099            do_flip = 1;
1100            }
1101          else
1102            {
1103            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1104            fclose(f);
1105            continue;
1106            }
1107          }
1108    
1109        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1110          do_flip? " (byte-inverted)" : "", p);
1111    
1112        /* Need to know if UTF-8 for printing data strings */
1113    
1114        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1115        use_utf8 = (get_options & PCRE_UTF8) != 0;
1116    
1117        /* Now see if there is any following study data */
1118    
1119        if (true_study_size != 0)
1120          {
1121          pcre_study_data *psd;
1122    
1123          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1124          extra->flags = PCRE_EXTRA_STUDY_DATA;
1125    
1126          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1127          extra->study_data = psd;
1128    
1129          if (fread(psd, 1, true_study_size, f) != true_study_size)
1130            {
1131            FAIL_READ:
1132            fprintf(outfile, "Failed to read data from %s\n", p);
1133            if (extra != NULL) new_free(extra);
1134            if (re != NULL) new_free(re);
1135            fclose(f);
1136            continue;
1137            }
1138          fprintf(outfile, "Study data loaded from %s\n", p);
1139          do_study = 1;     /* To get the data output if requested */
1140          }
1141        else fprintf(outfile, "No study data\n");
1142    
1143        fclose(f);
1144        goto SHOW_INFO;
1145        }
1146    
1147      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1148      the pattern; if is isn't complete, read more. */
1149    
1150    delimiter = *p++;    delimiter = *p++;
1151    
1152    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
1153      {      {
1154      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1155      goto SKIP_DATA;      goto SKIP_DATA;
1156      }      }
1157    
1158    pp = p;    pp = p;
1159      poffset = p - buffer;
1160    
1161    for(;;)    for(;;)
1162      {      {
1163      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
     if (*pp != 0) break;  
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
1164        {        {
1165        fprintf(outfile, "** Expression too long - missing delimiter?\n");        if (*pp == '\\' && pp[1] != 0) pp++;
1166        goto SKIP_DATA;          else if (*pp == delimiter) break;
1167          pp++;
1168        }        }
1169        if (*pp != 0) break;
1170      if (infile == stdin) printf("    > ");      if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     if (fgets((char *)pp, len, infile) == NULL)  
1171        {        {
1172        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1173        done = 1;        done = 1;
1174        goto CONTINUE;        goto CONTINUE;
1175        }        }
1176      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1177      }      }
1178    
1179    /* Terminate the pattern at the delimiter */    /* The buffer may have moved while being extended; reset the start of data
1180      pointer to the correct relative point in the buffer. */
1181    
1182      p = buffer + poffset;
1183    
1184      /* If the first character after the delimiter is backslash, make
1185      the pattern end with backslash. This is purely to provide a way
1186      of testing for the error message when a pattern ends with backslash. */
1187    
1188      if (pp[1] == '\\') *pp++ = '\\';
1189    
1190      /* Terminate the pattern at the delimiter, and save a copy of the pattern
1191      for callouts. */
1192    
1193    *pp++ = 0;    *pp++ = 0;
1194      strcpy((char *)pbuffer, (char *)p);
1195    
1196    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1197    
1198    options = 0;    options = 0;
1199    study_options = 0;    study_options = 0;
1200      log_store = showstore;  /* default from command line */
1201    
1202    while (*pp != 0)    while (*pp != 0)
1203      {      {
1204      switch (*pp++)      switch (*pp++)
1205        {        {
1206          case 'f': options |= PCRE_FIRSTLINE; break;
1207          case 'g': do_g = 1; break;
1208        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1209        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1210        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1211        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1212    
1213          case '+': do_showrest = 1; break;
1214        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1215        case 'D': do_debug = 1; break;        case 'B': do_debug = 1; break;
1216          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1217          case 'D': do_debug = do_showinfo = 1; break;
1218        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1219          case 'F': do_flip = 1; break;
1220          case 'G': do_G = 1; break;
1221          case 'I': do_showinfo = 1; break;
1222          case 'J': options |= PCRE_DUPNAMES; break;
1223          case 'M': log_store = 1; break;
1224          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1225    
1226    #if !defined NOPOSIX
1227        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1228    #endif
1229    
1230        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
       case 'I': study_options |= PCRE_CASELESS; break;  
1231        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1232        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1233        case '\n': case ' ': break;        case 'Z': debug_lengths = 0; break;
1234          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1235          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1236    
1237          case 'L':
1238          ppp = pp;
1239          /* The '\r' test here is so that it works on Windows. */
1240          /* The '0' test is just in case this is an unterminated line. */
1241          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1242          *ppp = 0;
1243          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1244            {
1245            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1246            goto SKIP_DATA;
1247            }
1248          locale_set = 1;
1249          tables = pcre_maketables();
1250          pp = ppp;
1251          break;
1252    
1253          case '>':
1254          to_file = pp;
1255          while (*pp != 0) pp++;
1256          while (isspace(pp[-1])) pp--;
1257          *pp = 0;
1258          break;
1259    
1260          case '<':
1261            {
1262            if (strncmp((char *)pp, "JS>", 3) == 0)
1263              {
1264              options |= PCRE_JAVASCRIPT_COMPAT;
1265              pp += 3;
1266              }
1267            else
1268              {
1269              int x = check_newline(pp, outfile);
1270              if (x == 0) goto SKIP_DATA;
1271              options |= x;
1272              while (*pp++ != '>');
1273              }
1274            }
1275          break;
1276    
1277          case '\r':                      /* So that it works in Windows */
1278          case '\n':
1279          case ' ':
1280          break;
1281    
1282        default:        default:
1283        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1284        goto SKIP_DATA;        goto SKIP_DATA;
# Line 414  while (!done) Line 1286  while (!done)
1286      }      }
1287    
1288    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1289    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
1290      local character tables. */
1291    
1292    #if !defined NOPOSIX
1293    if (posix || do_posix)    if (posix || do_posix)
1294      {      {
1295      int rc;      int rc;
1296      int cflags = 0;      int cflags = 0;
1297    
1298      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1299      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1300        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1301        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1302        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1303    
1304      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1305    
1306      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 429  while (!done) Line 1308  while (!done)
1308    
1309      if (rc != 0)      if (rc != 0)
1310        {        {
1311        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1312        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1313        goto SKIP_DATA;        goto SKIP_DATA;
1314        }        }
# Line 438  while (!done) Line 1317  while (!done)
1317    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1318    
1319    else    else
1320    #endif  /* !defined NOPOSIX */
1321    
1322      {      {
1323      if (timeit)      if (timeit > 0)
1324        {        {
1325        register int i;        register int i;
1326        clock_t time_taken;        clock_t time_taken;
1327        clock_t start_time = clock();        clock_t start_time = clock();
1328        for (i = 0; i < 4000; i++)        for (i = 0; i < timeit; i++)
1329          {          {
1330          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1331          if (re != NULL) free(re);          if (re != NULL) free(re);
1332          }          }
1333        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1334        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1335          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)timeit) /
1336              (double)CLOCKS_PER_SEC);
1337        }        }
1338    
1339      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1340    
1341      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1342      if non-interactive. */      if non-interactive. */
# Line 467  while (!done) Line 1349  while (!done)
1349          {          {
1350          for (;;)          for (;;)
1351            {            {
1352            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1353              {              {
1354              done = 1;              done = 1;
1355              goto CONTINUE;              goto CONTINUE;
# Line 478  while (!done) Line 1360  while (!done)
1360            }            }
1361          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1362          }          }
1363        continue;        goto CONTINUE;
1364          }
1365    
1366        /* Compilation succeeded; print data if required. There are now two
1367        info-returning functions. The old one has a limited interface and
1368        returns only limited data. Check that it agrees with the newer one. */
1369    
1370        if (log_store)
1371          fprintf(outfile, "Memory allocation (code space): %d\n",
1372            (int)(gotten_store -
1373                  sizeof(real_pcre) -
1374                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1375    
1376        /* Extract the size for possible writing before possibly flipping it,
1377        and remember the store that was got. */
1378    
1379        true_size = ((real_pcre *)re)->size;
1380        regex_gotten_store = gotten_store;
1381    
1382        /* If /S was present, study the regexp to generate additional info to
1383        help with the matching. */
1384    
1385        if (do_study)
1386          {
1387          if (timeit > 0)
1388            {
1389            register int i;
1390            clock_t time_taken;
1391            clock_t start_time = clock();
1392            for (i = 0; i < timeit; i++)
1393              extra = pcre_study(re, study_options, &error);
1394            time_taken = clock() - start_time;
1395            if (extra != NULL) free(extra);
1396            fprintf(outfile, "  Study time %.4f milliseconds\n",
1397              (((double)time_taken * 1000.0) / (double)timeit) /
1398                (double)CLOCKS_PER_SEC);
1399            }
1400          extra = pcre_study(re, study_options, &error);
1401          if (error != NULL)
1402            fprintf(outfile, "Failed to study: %s\n", error);
1403          else if (extra != NULL)
1404            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1405          }
1406    
1407        /* If the 'F' option was present, we flip the bytes of all the integer
1408        fields in the regex data block and the study block. This is to make it
1409        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1410        compiled on a different architecture. */
1411    
1412        if (do_flip)
1413          {
1414          real_pcre *rre = (real_pcre *)re;
1415          rre->magic_number =
1416            byteflip(rre->magic_number, sizeof(rre->magic_number));
1417          rre->size = byteflip(rre->size, sizeof(rre->size));
1418          rre->options = byteflip(rre->options, sizeof(rre->options));
1419          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1420          rre->top_bracket =
1421            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1422          rre->top_backref =
1423            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1424          rre->first_byte =
1425            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1426          rre->req_byte =
1427            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1428          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1429            sizeof(rre->name_table_offset));
1430          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1431            sizeof(rre->name_entry_size));
1432          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1433            sizeof(rre->name_count));
1434    
1435          if (extra != NULL)
1436            {
1437            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1438            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1439            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1440            }
1441        }        }
1442    
1443      /* Compilation succeeded; print data if required */      /* Extract information from the compiled data if required */
1444    
1445      if (showinfo || do_debug)      SHOW_INFO:
1446    
1447        if (do_debug)
1448        {        {
1449        int first_char, count;        fprintf(outfile, "------------------------------------------------------------------\n");
1450          pcre_printint(re, outfile, debug_lengths);
1451          }
1452    
1453        if (debug || do_debug) print_internals(re);      if (do_showinfo)
1454          {
1455          unsigned long int get_options, all_options;
1456    #if !defined NOINFOCHECK
1457          int old_first_char, old_options, old_count;
1458    #endif
1459          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1460            hascrorlf;
1461          int nameentrysize, namecount;
1462          const uschar *nametable;
1463    
1464          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1465          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1466          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1467          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1468          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1469          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1470          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1471          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1472          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1473          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1474          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1475          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1476    
1477        count = pcre_info(re, &options, &first_char);  #if !defined NOINFOCHECK
1478          old_count = pcre_info(re, &old_options, &old_first_char);
1479        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1480          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
1481        else        else
1482          {          {
1483          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
1484          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1485            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
1486              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
1487              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
1488              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1489              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
1490              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
1491              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
1492              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1493              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              get_options, old_options);
1494          if (first_char == -1)          }
1495            {  #endif
1496            fprintf(outfile, "First char at start or follows \\n\n");  
1497            }        if (size != regex_gotten_store) fprintf(outfile,
1498          else if (first_char < 0)          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1499            (int)size, (int)regex_gotten_store);
1500    
1501          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1502          if (backrefmax > 0)
1503            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1504    
1505          if (namecount > 0)
1506            {
1507            fprintf(outfile, "Named capturing subpatterns:\n");
1508            while (namecount-- > 0)
1509            {            {
1510            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1511                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1512                GET2(nametable, 0));
1513              nametable += nameentrysize;
1514            }            }
1515            }
1516    
1517          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1518          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1519    
1520          all_options = ((real_pcre *)re)->options;
1521          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1522    
1523          if (get_options == 0) fprintf(outfile, "No options\n");
1524            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1525              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1526              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1527              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1528              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1529              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1530              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1531              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1532              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1533              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1534              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1535              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1536              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1537              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1538              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1539              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1540    
1541          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1542    
1543          switch (get_options & PCRE_NEWLINE_BITS)
1544            {
1545            case PCRE_NEWLINE_CR:
1546            fprintf(outfile, "Forced newline sequence: CR\n");
1547            break;
1548    
1549            case PCRE_NEWLINE_LF:
1550            fprintf(outfile, "Forced newline sequence: LF\n");
1551            break;
1552    
1553            case PCRE_NEWLINE_CRLF:
1554            fprintf(outfile, "Forced newline sequence: CRLF\n");
1555            break;
1556    
1557            case PCRE_NEWLINE_ANYCRLF:
1558            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1559            break;
1560    
1561            case PCRE_NEWLINE_ANY:
1562            fprintf(outfile, "Forced newline sequence: ANY\n");
1563            break;
1564    
1565            default:
1566            break;
1567            }
1568    
1569          if (first_char == -1)
1570            {
1571            fprintf(outfile, "First char at start or follows newline\n");
1572            }
1573          else if (first_char < 0)
1574            {
1575            fprintf(outfile, "No first char\n");
1576            }
1577          else
1578            {
1579            int ch = first_char & 255;
1580            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1581              "" : " (caseless)";
1582            if (PRINTHEX(ch))
1583              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1584            else
1585              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1586            }
1587    
1588          if (need_char < 0)
1589            {
1590            fprintf(outfile, "No need char\n");
1591            }
1592          else
1593            {
1594            int ch = need_char & 255;
1595            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1596              "" : " (caseless)";
1597            if (PRINTHEX(ch))
1598              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1599            else
1600              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1601            }
1602    
1603          /* Don't output study size; at present it is in any case a fixed
1604          value, but it varies, depending on the computer architecture, and
1605          so messes up the test suite. (And with the /F option, it might be
1606          flipped.) */
1607    
1608          if (do_study)
1609            {
1610            if (extra == NULL)
1611              fprintf(outfile, "Study returned NULL\n");
1612          else          else
1613            {            {
1614            if (isprint(first_char))            uschar *start_bits = NULL;
1615              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1616    
1617              if (start_bits == NULL)
1618                fprintf(outfile, "No starting byte set\n");
1619            else            else
1620              fprintf(outfile, "First char = %d\n", first_char);              {
1621                int i;
1622                int c = 24;
1623                fprintf(outfile, "Starting byte set: ");
1624                for (i = 0; i < 256; i++)
1625                  {
1626                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1627                    {
1628                    if (c > 75)
1629                      {
1630                      fprintf(outfile, "\n  ");
1631                      c = 2;
1632                      }
1633                    if (PRINTHEX(i) && i != ' ')
1634                      {
1635                      fprintf(outfile, "%c ", i);
1636                      c += 2;
1637                      }
1638                    else
1639                      {
1640                      fprintf(outfile, "\\x%02x ", i);
1641                      c += 5;
1642                      }
1643                    }
1644                  }
1645                fprintf(outfile, "\n");
1646                }
1647            }            }
1648          }          }
1649        }        }
1650    
1651      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1652      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1653        the study length, in big-endian order. */
1654    
1655      if (do_study)      if (to_file != NULL)
1656        {        {
1657        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1658          if (f == NULL)
1659          {          {
1660          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < 4000; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.2f milliseconds\n",  
           ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
1661          }          }
1662          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
1663          {          {
1664          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1665          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (uschar)((true_size >> 24) & 255);
1666            fprintf(outfile, "No starting character set\n");          sbuf[1] = (uschar)((true_size >> 16) & 255);
1667            sbuf[2] = (uschar)((true_size >>  8) & 255);
1668            sbuf[3] = (uschar)((true_size) & 255);
1669    
1670            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1671            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1672            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1673            sbuf[7] = (uschar)((true_study_size) & 255);
1674    
1675            if (fwrite(sbuf, 1, 8, f) < 8 ||
1676                fwrite(re, 1, true_size, f) < true_size)
1677              {
1678              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1679              }
1680          else          else
1681            {            {
1682            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1683            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1684              {              {
1685              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1686                    true_study_size)
1687                {                {
1688                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1689                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1690                }                }
1691                else fprintf(outfile, "Study data written to %s\n", to_file);
1692    
1693              }              }
           fprintf(outfile, "\n");  
1694            }            }
1695            fclose(f);
1696          }          }
1697    
1698          new_free(re);
1699          if (extra != NULL) new_free(extra);
1700          if (tables != NULL) new_free((void *)tables);
1701          continue;  /* With next regex */
1702        }        }
1703      }      }        /* End of non-POSIX compile */
1704    
1705    /* Read data lines and test them */    /* Read data lines and test them */
1706    
1707    for (;;)    for (;;)
1708      {      {
1709      unsigned char *q;      uschar *q;
1710        uschar *bptr;
1711        int *use_offsets = offsets;
1712        int use_size_offsets = size_offsets;
1713        int callout_data = 0;
1714        int callout_data_set = 0;
1715      int count, c;      int count, c;
1716      int offsets[30];      int copystrings = 0;
1717      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = default_find_match_limit;
1718        int getstrings = 0;
1719        int getlist = 0;
1720        int gmatched = 0;
1721        int start_offset = 0;
1722        int g_notempty = 0;
1723        int use_dfa = 0;
1724    
1725      options = 0;      options = 0;
1726    
1727      if (infile == stdin) printf("  data> ");      *copynames = 0;
1728      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1729    
1730        copynamesptr = copynames;
1731        getnamesptr = getnames;
1732    
1733        pcre_callout = callout;
1734        first_callout = 1;
1735        callout_extra = 0;
1736        callout_count = 0;
1737        callout_fail_count = 999999;
1738        callout_fail_id = -1;
1739        show_malloc = 0;
1740    
1741        if (extra != NULL) extra->flags &=
1742          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1743    
1744        len = 0;
1745        for (;;)
1746        {        {
1747        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1748        goto CONTINUE;          {
1749            if (len > 0) break;
1750            done = 1;
1751            goto CONTINUE;
1752            }
1753          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1754          len = (int)strlen((char *)buffer);
1755          if (buffer[len-1] == '\n') break;
1756        }        }
     if (infile != stdin) fprintf(outfile, (char *)buffer);  
1757    
     len = (int)strlen((char *)buffer);  
1758      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1759      buffer[len] = 0;      buffer[len] = 0;
1760      if (len == 0) break;      if (len == 0) break;
# Line 614  while (!done) Line 1762  while (!done)
1762      p = buffer;      p = buffer;
1763      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1764    
1765      q = dbuffer;      bptr = q = dbuffer;
1766      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1767        {        {
1768        int i = 0;        int i = 0;
1769        int n = 0;        int n = 0;
1770    
1771        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1772          {          {
1773          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 635  while (!done) Line 1784  while (!done)
1784          c -= '0';          c -= '0';
1785          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1786            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1787    
1788    #if !defined NOUTF8
1789            if (use_utf8 && c > 255)
1790              {
1791              unsigned char buff8[8];
1792              int ii, utn;
1793              utn = ord2utf8(c, buff8);
1794              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1795              c = buff8[ii];   /* Last byte */
1796              }
1797    #endif
1798          break;          break;
1799    
1800          case 'x':          case 'x':
1801    
1802            /* Handle \x{..} specially - new Perl thing for utf8 */
1803    
1804    #if !defined NOUTF8
1805            if (*p == '{')
1806              {
1807              unsigned char *pt = p;
1808              c = 0;
1809              while (isxdigit(*(++pt)))
1810                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1811              if (*pt == '}')
1812                {
1813                unsigned char buff8[8];
1814                int ii, utn;
1815                if (use_utf8)
1816                  {
1817                  utn = ord2utf8(c, buff8);
1818                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1819                  c = buff8[ii];   /* Last byte */
1820                  }
1821                else
1822                 {
1823                 if (c > 255)
1824                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1825                     "UTF-8 mode is not enabled.\n"
1826                     "** Truncation will probably give the wrong result.\n", c);
1827                 }
1828                p = pt + 1;
1829                break;
1830                }
1831              /* Not correct form; fall through */
1832              }
1833    #endif
1834    
1835            /* Ordinary \x */
1836    
1837          c = 0;          c = 0;
1838          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1839            {            {
# Line 646  while (!done) Line 1842  while (!done)
1842            }            }
1843          break;          break;
1844    
1845          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1846          p--;          p--;
1847          continue;          continue;
1848    
1849            case '>':
1850            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1851            continue;
1852    
1853          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1854          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1855          continue;          continue;
# Line 658  while (!done) Line 1858  while (!done)
1858          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1859          continue;          continue;
1860    
1861          case 'E':          case 'C':
1862          options |= PCRE_DOLLAR_ENDONLY;          if (isdigit(*p))    /* Set copy string */
1863              {
1864              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1865              copystrings |= 1 << n;
1866              }
1867            else if (isalnum(*p))
1868              {
1869              uschar *npp = copynamesptr;
1870              while (isalnum(*p)) *npp++ = *p++;
1871              *npp++ = 0;
1872              *npp = 0;
1873              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1874              if (n < 0)
1875                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1876              copynamesptr = npp;
1877              }
1878            else if (*p == '+')
1879              {
1880              callout_extra = 1;
1881              p++;
1882              }
1883            else if (*p == '-')
1884              {
1885              pcre_callout = NULL;
1886              p++;
1887              }
1888            else if (*p == '!')
1889              {
1890              callout_fail_id = 0;
1891              p++;
1892              while(isdigit(*p))
1893                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1894              callout_fail_count = 0;
1895              if (*p == '!')
1896                {
1897                p++;
1898                while(isdigit(*p))
1899                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1900                }
1901              }
1902            else if (*p == '*')
1903              {
1904              int sign = 1;
1905              callout_data = 0;
1906              if (*(++p) == '-') { sign = -1; p++; }
1907              while(isdigit(*p))
1908                callout_data = callout_data * 10 + *p++ - '0';
1909              callout_data *= sign;
1910              callout_data_set = 1;
1911              }
1912            continue;
1913    
1914    #if !defined NODFA
1915            case 'D':
1916    #if !defined NOPOSIX
1917            if (posix || do_posix)
1918              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1919            else
1920    #endif
1921              use_dfa = 1;
1922            continue;
1923    
1924            case 'F':
1925            options |= PCRE_DFA_SHORTEST;
1926            continue;
1927    #endif
1928    
1929            case 'G':
1930            if (isdigit(*p))
1931              {
1932              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1933              getstrings |= 1 << n;
1934              }
1935            else if (isalnum(*p))
1936              {
1937              uschar *npp = getnamesptr;
1938              while (isalnum(*p)) *npp++ = *p++;
1939              *npp++ = 0;
1940              *npp = 0;
1941              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1942              if (n < 0)
1943                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1944              getnamesptr = npp;
1945              }
1946          continue;          continue;
1947    
1948          case 'I':          case 'L':
1949          options |= PCRE_CASELESS;          getlist = 1;
1950          continue;          continue;
1951    
1952          case 'M':          case 'M':
1953          options |= PCRE_MULTILINE;          find_match_limit = 1;
1954          continue;          continue;
1955    
1956          case 'S':          case 'N':
1957          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1958          continue;          continue;
1959    
1960          case 'O':          case 'O':
1961          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1962          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1963              {
1964              size_offsets_max = n;
1965              free(offsets);
1966              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1967              if (offsets == NULL)
1968                {
1969                printf("** Failed to get %d bytes of memory for offsets vector\n",
1970                  (int)(size_offsets_max * sizeof(int)));
1971                yield = 1;
1972                goto EXIT;
1973                }
1974              }
1975            use_size_offsets = n;
1976            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1977            continue;
1978    
1979            case 'P':
1980            options |= PCRE_PARTIAL;
1981            continue;
1982    
1983            case 'Q':
1984            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1985            if (extra == NULL)
1986              {
1987              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1988              extra->flags = 0;
1989              }
1990            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1991            extra->match_limit_recursion = n;
1992            continue;
1993    
1994            case 'q':
1995            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1996            if (extra == NULL)
1997              {
1998              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1999              extra->flags = 0;
2000              }
2001            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2002            extra->match_limit = n;
2003            continue;
2004    
2005    #if !defined NODFA
2006            case 'R':
2007            options |= PCRE_DFA_RESTART;
2008            continue;
2009    #endif
2010    
2011            case 'S':
2012            show_malloc = 1;
2013          continue;          continue;
2014    
2015            case 'Y':
2016            options |= PCRE_NO_START_OPTIMIZE;
2017            continue;
2018    
2019          case 'Z':          case 'Z':
2020          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2021          continue;          continue;
2022    
2023            case '?':
2024            options |= PCRE_NO_UTF8_CHECK;
2025            continue;
2026    
2027            case '<':
2028              {
2029              int x = check_newline(p, outfile);
2030              if (x == 0) goto NEXT_DATA;
2031              options |= x;
2032              while (*p++ != '>');
2033              }
2034            continue;
2035          }          }
2036        *q++ = c;        *q++ = c;
2037        }        }
2038      *q = 0;      *q = 0;
2039      len = q - dbuffer;      len = q - dbuffer;
2040    
2041        /* Move the data to the end of the buffer so that a read over the end of
2042        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2043        we are using the POSIX interface, we must include the terminating zero. */
2044    
2045    #if !defined NOPOSIX
2046        if (posix || do_posix)
2047          {
2048          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2049          bptr += buffer_size - len - 1;
2050          }
2051        else
2052    #endif
2053          {
2054          memmove(bptr + buffer_size - len, bptr, len);
2055          bptr += buffer_size - len;
2056          }
2057    
2058        if ((all_use_dfa || use_dfa) && find_match_limit)
2059          {
2060          printf("**Match limit not relevant for DFA matching: ignored\n");
2061          find_match_limit = 0;
2062          }
2063    
2064      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2065      support timing. */      support timing or playing with the match limit or callout data. */
2066    
2067    #if !defined NOPOSIX
2068      if (posix || do_posix)      if (posix || do_posix)
2069        {        {
2070        int rc;        int rc;
2071        int eflags = 0;        int eflags = 0;
2072        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
2073          if (use_size_offsets > 0)
2074            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2075        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2076        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2077          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2078    
2079        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
2080    
2081        if (rc != 0)        if (rc != 0)
2082          {          {
2083          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2084          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2085          }          }
2086          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2087                  != 0)
2088            {
2089            fprintf(outfile, "Matched with REG_NOSUB\n");
2090            }
2091        else        else
2092          {          {
2093          size_t i;          size_t i;
2094          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2095            {            {
2096            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2097              {              {
2098              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
2099              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2100                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2101              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2102                if (i == 0 && do_showrest)
2103                  {
2104                  fprintf(outfile, " 0+ ");
2105                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2106                    outfile);
2107                  fprintf(outfile, "\n");
2108                  }
2109              }              }
2110            }            }
2111          }          }
2112          free(pmatch);
2113        }        }
2114    
2115      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
2116    
2117      else      else
2118    #endif  /* !defined NOPOSIX */
2119    
2120        for (;; gmatched++)    /* Loop for /g or /G */
2121        {        {
2122        if (timeit)        if (timeitm > 0)
2123          {          {
2124          register int i;          register int i;
2125          clock_t time_taken;          clock_t time_taken;
2126          clock_t start_time = clock();          clock_t start_time = clock();
2127          for (i = 0; i < 4000; i++)  
2128            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
2129              size_offsets);          if (all_use_dfa || use_dfa)
2130              {
2131              int workspace[1000];
2132              for (i = 0; i < timeitm; i++)
2133                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2134                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2135                  sizeof(workspace)/sizeof(int));
2136              }
2137            else
2138    #endif
2139    
2140            for (i = 0; i < timeitm; i++)
2141              count = pcre_exec(re, extra, (char *)bptr, len,
2142                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2143    
2144          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2145          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2146            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)timeitm) /
2147                (double)CLOCKS_PER_SEC);
2148            }
2149    
2150          /* If find_match_limit is set, we want to do repeated matches with
2151          varying limits in order to find the minimum value for the match limit and
2152          for the recursion limit. */
2153    
2154          if (find_match_limit)
2155            {
2156            if (extra == NULL)
2157              {
2158              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2159              extra->flags = 0;
2160              }
2161    
2162            (void)check_match_limit(re, extra, bptr, len, start_offset,
2163              options|g_notempty, use_offsets, use_size_offsets,
2164              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2165              PCRE_ERROR_MATCHLIMIT, "match()");
2166    
2167            count = check_match_limit(re, extra, bptr, len, start_offset,
2168              options|g_notempty, use_offsets, use_size_offsets,
2169              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2170              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2171            }
2172    
2173          /* If callout_data is set, use the interface with additional data */
2174    
2175          else if (callout_data_set)
2176            {
2177            if (extra == NULL)
2178              {
2179              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2180              extra->flags = 0;
2181              }
2182            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2183            extra->callout_data = &callout_data;
2184            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2185              options | g_notempty, use_offsets, use_size_offsets);
2186            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2187          }          }
2188    
2189        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
2190          size_offsets);        value of match_limit. */
2191    
2192    #if !defined NODFA
2193          else if (all_use_dfa || use_dfa)
2194            {
2195            int workspace[1000];
2196            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2197              options | g_notempty, use_offsets, use_size_offsets, workspace,
2198              sizeof(workspace)/sizeof(int));
2199            if (count == 0)
2200              {
2201              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2202              count = use_size_offsets/2;
2203              }
2204            }
2205    #endif
2206    
2207        if (count == 0)        else
2208          {          {
2209          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2210          count = size_offsets/2;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2211            if (count == 0)
2212              {
2213              fprintf(outfile, "Matched, but too many substrings\n");
2214              count = use_size_offsets/3;
2215              }
2216          }          }
2217    
2218          /* Matched */
2219    
2220        if (count >= 0)        if (count >= 0)
2221          {          {
2222          int i;          int i, maxcount;
2223          count *= 2;  
2224          for (i = 0; i < count; i += 2)  #if !defined NODFA
2225            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2226    #endif
2227              maxcount = use_size_offsets/3;
2228    
2229            /* This is a check against a lunatic return value. */
2230    
2231            if (count > maxcount)
2232              {
2233              fprintf(outfile,
2234                "** PCRE error: returned count %d is too big for offset size %d\n",
2235                count, use_size_offsets);
2236              count = use_size_offsets/3;
2237              if (do_g || do_G)
2238                {
2239                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2240                do_g = do_G = FALSE;        /* Break g/G loop */
2241                }
2242              }
2243    
2244            for (i = 0; i < count * 2; i += 2)
2245            {            {
2246            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2247              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2248            else            else
2249              {              {
2250              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2251              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2252                  use_offsets[i+1] - use_offsets[i], outfile);
2253              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2254                if (i == 0)
2255                  {
2256                  if (do_showrest)
2257                    {
2258                    fprintf(outfile, " 0+ ");
2259                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2260                      outfile);
2261                    fprintf(outfile, "\n");
2262                    }
2263                  }
2264                }
2265              }
2266    
2267            for (i = 0; i < 32; i++)
2268              {
2269              if ((copystrings & (1 << i)) != 0)
2270                {
2271                char copybuffer[256];
2272                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2273                  i, copybuffer, sizeof(copybuffer));
2274                if (rc < 0)
2275                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2276                else
2277                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2278                }
2279              }
2280    
2281            for (copynamesptr = copynames;
2282                 *copynamesptr != 0;
2283                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2284              {
2285              char copybuffer[256];
2286              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2287                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2288              if (rc < 0)
2289                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2290              else
2291                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2292              }
2293    
2294            for (i = 0; i < 32; i++)
2295              {
2296              if ((getstrings & (1 << i)) != 0)
2297                {
2298                const char *substring;
2299                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2300                  i, &substring);
2301                if (rc < 0)
2302                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
2303                else
2304                  {
2305                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2306                  pcre_free_substring(substring);
2307                  }
2308                }
2309              }
2310    
2311            for (getnamesptr = getnames;
2312                 *getnamesptr != 0;
2313                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2314              {
2315              const char *substring;
2316              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2317                count, (char *)getnamesptr, &substring);
2318              if (rc < 0)
2319                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2320              else
2321                {
2322                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2323                pcre_free_substring(substring);
2324              }              }
2325            }            }
2326    
2327            if (getlist)
2328              {
2329              const char **stringlist;
2330              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2331                &stringlist);
2332              if (rc < 0)
2333                fprintf(outfile, "get substring list failed %d\n", rc);
2334              else
2335                {
2336                for (i = 0; i < count; i++)
2337                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2338                if (stringlist[i] != NULL)
2339                  fprintf(outfile, "string list not terminated by NULL\n");
2340                /* free((void *)stringlist); */
2341                pcre_free_substring_list(stringlist);
2342                }
2343              }
2344            }
2345    
2346          /* There was a partial match */
2347    
2348          else if (count == PCRE_ERROR_PARTIAL)
2349            {
2350            fprintf(outfile, "Partial match");
2351    #if !defined NODFA
2352            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2353              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2354                bptr + use_offsets[0]);
2355    #endif
2356            fprintf(outfile, "\n");
2357            break;  /* Out of the /g loop */
2358          }          }
2359    
2360          /* Failed to match. If this is a /g or /G loop and we previously set
2361          g_notempty after a null match, this is not necessarily the end. We want
2362          to advance the start offset, and continue. We won't be at the end of the
2363          string - that was checked before setting g_notempty.
2364    
2365          Complication arises in the case when the newline option is "any" or
2366          "anycrlf". If the previous match was at the end of a line terminated by
2367          CRLF, an advance of one character just passes the \r, whereas we should
2368          prefer the longer newline sequence, as does the code in pcre_exec().
2369          Fudge the offset value to achieve this.
2370    
2371          Otherwise, in the case of UTF-8 matching, the advance must be one
2372          character, not one byte. */
2373    
2374        else        else
2375          {          {
2376          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2377              {
2378              int onechar = 1;
2379              unsigned int obits = ((real_pcre *)re)->options;
2380              use_offsets[0] = start_offset;
2381              if ((obits & PCRE_NEWLINE_BITS) == 0)
2382                {
2383                int d;
2384                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2385                /* Note that these values are always the ASCII ones, even in
2386                EBCDIC environments. CR = 13, NL = 10. */
2387                obits = (d == 13)? PCRE_NEWLINE_CR :
2388                        (d == 10)? PCRE_NEWLINE_LF :
2389                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2390                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2391                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2392                }
2393              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2394                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2395                  &&
2396                  start_offset < len - 1 &&
2397                  bptr[start_offset] == '\r' &&
2398                  bptr[start_offset+1] == '\n')
2399                onechar++;
2400              else if (use_utf8)
2401                {
2402                while (start_offset + onechar < len)
2403                  {
2404                  int tb = bptr[start_offset+onechar];
2405                  if (tb <= 127) break;
2406                  tb &= 0xc0;
2407                  if (tb != 0 && tb != 0xc0) onechar++;
2408                  }
2409                }
2410              use_offsets[1] = start_offset + onechar;
2411              }
2412            else
2413              {
2414              if (count == PCRE_ERROR_NOMATCH)
2415                {
2416                if (gmatched == 0) fprintf(outfile, "No match\n");
2417                }
2418            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2419              break;  /* Out of the /g loop */
2420              }
2421          }          }
2422        }  
2423      }        /* If not /g or /G we are done */
2424    
2425          if (!do_g && !do_G) break;
2426    
2427          /* If we have matched an empty string, first check to see if we are at
2428          the end of the subject. If so, the /g loop is over. Otherwise, mimic
2429          what Perl's /g options does. This turns out to be rather cunning. First
2430          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2431          same point. If this fails (picked up above) we advance to the next
2432          character. */
2433    
2434          g_notempty = 0;
2435    
2436          if (use_offsets[0] == use_offsets[1])
2437            {
2438            if (use_offsets[0] == len) break;
2439            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2440            }
2441    
2442          /* For /g, update the start offset, leaving the rest alone */
2443    
2444          if (do_g) start_offset = use_offsets[1];
2445    
2446          /* For /G, update the pointer and length */
2447    
2448          else
2449            {
2450            bptr += use_offsets[1];
2451            len -= use_offsets[1];
2452            }
2453          }  /* End of loop for /g and /G */
2454    
2455        NEXT_DATA: continue;
2456        }    /* End of loop for data lines */
2457    
2458    CONTINUE:    CONTINUE:
2459    
2460    #if !defined NOPOSIX
2461    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2462    if (re != NULL) free(re);  #endif
2463    if (extra != NULL) free(extra);  
2464      if (re != NULL) new_free(re);
2465      if (extra != NULL) new_free(extra);
2466      if (tables != NULL)
2467        {
2468        new_free((void *)tables);
2469        setlocale(LC_CTYPE, "C");
2470        locale_set = 0;
2471        }
2472    }    }
2473    
2474  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2475  return 0;  
2476    EXIT:
2477    
2478    if (infile != NULL && infile != stdin) fclose(infile);
2479    if (outfile != NULL && outfile != stdout) fclose(outfile);
2480    
2481    free(buffer);
2482    free(dbuffer);
2483    free(pbuffer);
2484    free(offsets);
2485    
2486    return yield;
2487  }  }
2488    
2489  /* End */  /* End of pcretest.c */

Legend:
Removed from v.19  
changed lines
  Added in v.391

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12