/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 41 by nigel, Sat Feb 24 21:39:17 2007 UTC revision 289 by ph10, Sun Dec 23 12:17:20 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #include <unistd.h>
53    #include <readline/readline.h>
54    #include <readline/history.h>
55    #endif
56    
57    
58    /* A number of things vary for Windows builds. Originally, pcretest opened its
59    input and output without "b"; then I was told that "b" was needed in some
60    environments, so it was added for release 5.0 to both the input and output. (It
61    makes no difference on Unix-like systems.) Later I was told that it is wrong
62    for the input on Windows. I've now abstracted the modes into two macros that
63    are set here, to make it easier to fiddle with them, and removed "b" from the
64    input mode under Windows. */
65    
66    #if defined(_WIN32) || defined(WIN32)
67    #include <io.h>                /* For _setmode() */
68    #include <fcntl.h>             /* For _O_BINARY */
69    #define INPUT_MODE   "r"
70    #define OUTPUT_MODE  "wb"
71    
72    #else
73    #include <sys/time.h>          /* These two includes are needed */
74    #include <sys/resource.h>      /* for setrlimit(). */
75    #define INPUT_MODE   "rb"
76    #define OUTPUT_MODE  "wb"
77    #endif
78    
79    
80  /* Use the internal info for displaying the results of pcre_study(). */  /* We have to include pcre_internal.h because we need the internal info for
81    displaying the results of pcre_study() and we also need to know about the
82    internal macros, structures, and other internal data values; pcretest has
83    "inside information" compared to a program that strictly follows the PCRE API.
84    
85    Although pcre_internal.h does itself include pcre.h, we explicitly include it
86    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
87    appropriately for an application, not for building PCRE. */
88    
89    #include "pcre.h"
90    #include "pcre_internal.h"
91    
92    /* We need access to the data tables that PCRE uses. So as not to have to keep
93    two copies, we include the source file here, changing the names of the external
94    symbols to prevent clashes. */
95    
96    #define _pcre_utf8_table1      utf8_table1
97    #define _pcre_utf8_table1_size utf8_table1_size
98    #define _pcre_utf8_table2      utf8_table2
99    #define _pcre_utf8_table3      utf8_table3
100    #define _pcre_utf8_table4      utf8_table4
101    #define _pcre_utt              utt
102    #define _pcre_utt_size         utt_size
103    #define _pcre_utt_names        utt_names
104    #define _pcre_OP_lengths       OP_lengths
105    
106    #include "pcre_tables.c"
107    
108    /* We also need the pcre_printint() function for printing out compiled
109    patterns. This function is in a separate file so that it can be included in
110    pcre_compile.c when that module is compiled with debugging enabled.
111    
112    The definition of the macro PRINTABLE, which determines whether to print an
113    output character as-is or as a hex value when showing compiled patterns, is
114    contained in this file. We uses it here also, in cases when the locale has not
115    been explicitly changed, so as to get consistent output from systems that
116    differ in their output from isprint() even in the "C" locale. */
117    
118    #include "pcre_printint.src"
119    
120    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
121    
 #include "internal.h"  
122    
123  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
124  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 21  Makefile. */ Line 128  Makefile. */
128  #include "pcreposix.h"  #include "pcreposix.h"
129  #endif  #endif
130    
131    /* It is also possible, for the benefit of the version currently imported into
132    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
133    interface to the DFA matcher (NODFA), and without the doublecheck of the old
134    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
135    UTF8 support if PCRE is built without it. */
136    
137    #ifndef SUPPORT_UTF8
138    #ifndef NOUTF8
139    #define NOUTF8
140    #endif
141    #endif
142    
143    
144    /* Other parameters */
145    
146  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
147  #ifdef CLK_TCK  #ifdef CLK_TCK
148  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 29  Makefile. */ Line 151  Makefile. */
151  #endif  #endif
152  #endif  #endif
153    
154  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
155    
156    #define LOOPREPEAT 500000
157    
158    /* Static variables */
159    
160  static FILE *outfile;  static FILE *outfile;
161  static int log_store = 0;  static int log_store = 0;
162    static int callout_count;
163    static int callout_extra;
164    static int callout_fail_count;
165    static int callout_fail_id;
166    static int debug_lengths;
167    static int first_callout;
168    static int locale_set = 0;
169    static int show_malloc;
170    static int use_utf8;
171    static size_t gotten_store;
172    
173    /* The buffers grow automatically if very long input lines are encountered. */
174    
175    static int buffer_size = 50000;
176    static uschar *buffer = NULL;
177    static uschar *dbuffer = NULL;
178    static uschar *pbuffer = NULL;
179    
180    
181    
182    /*************************************************
183    *        Read or extend an input line            *
184    *************************************************/
185    
186    /* Input lines are read into buffer, but both patterns and data lines can be
187    continued over multiple input lines. In addition, if the buffer fills up, we
188    want to automatically expand it so as to be able to handle extremely large
189    lines that are needed for certain stress tests. When the input buffer is
190    expanded, the other two buffers must also be expanded likewise, and the
191    contents of pbuffer, which are a copy of the input for callouts, must be
192    preserved (for when expansion happens for a data line). This is not the most
193    optimal way of handling this, but hey, this is just a test program!
194    
195    Arguments:
196      f            the file to read
197      start        where in buffer to start (this *must* be within buffer)
198      prompt       for stdin or readline()
199    
200    Returns:       pointer to the start of new data
201                   could be a copy of start, or could be moved
202                   NULL if no data read and EOF reached
203    */
204    
205    static uschar *
206    extend_inputline(FILE *f, uschar *start, const char *prompt)
207    {
208    uschar *here = start;
209    
210    for (;;)
211      {
212      int rlen = buffer_size - (here - buffer);
213    
214      if (rlen > 1000)
215        {
216        int dlen;
217    
218        /* If libreadline support is required, use readline() to read a line if the
219        input is a terminal. Note that readline() removes the trailing newline, so
220        we must put it back again, to be compatible with fgets(). */
221    
222    #ifdef SUPPORT_LIBREADLINE
223        if (isatty(fileno(f)))
224          {
225          size_t len;
226          char *s = readline(prompt);
227          if (s == NULL) return (here == start)? NULL : start;
228          len = strlen(s);
229          if (len > 0) add_history(s);
230          if (len > rlen - 1) len = rlen - 1;
231          memcpy(here, s, len);
232          here[len] = '\n';
233          here[len+1] = 0;
234          free(s);
235          }
236        else
237    #endif
238    
239        /* Read the next line by normal means, prompting if the file is stdin. */
240    
241          {
242          if (f == stdin) printf(prompt);
243          if (fgets((char *)here, rlen,  f) == NULL)
244            return (here == start)? NULL : start;
245          }
246    
247        dlen = (int)strlen((char *)here);
248        if (dlen > 0 && here[dlen - 1] == '\n') return start;
249        here += dlen;
250        }
251    
252      else
253        {
254        int new_buffer_size = 2*buffer_size;
255        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
256        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
257        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
258    
259        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
260          {
261          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
262          exit(1);
263          }
264    
265        memcpy(new_buffer, buffer, buffer_size);
266        memcpy(new_pbuffer, pbuffer, buffer_size);
267    
268        buffer_size = new_buffer_size;
269    
270        start = new_buffer + (start - buffer);
271        here = new_buffer + (here - buffer);
272    
273        free(buffer);
274        free(dbuffer);
275        free(pbuffer);
276    
277        buffer = new_buffer;
278        dbuffer = new_dbuffer;
279        pbuffer = new_pbuffer;
280        }
281      }
282    
283    return NULL;  /* Control never gets here */
284    }
285    
286    
287    
288    
289    
290    
291    
292    /*************************************************
293    *          Read number from string               *
294    *************************************************/
295    
296    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
297    around with conditional compilation, just do the job by hand. It is only used
298    for unpicking arguments, so just keep it simple.
299    
300    Arguments:
301      str           string to be converted
302      endptr        where to put the end pointer
303    
304    Returns:        the unsigned long
305    */
306    
307    static int
308    get_value(unsigned char *str, unsigned char **endptr)
309    {
310    int result = 0;
311    while(*str != 0 && isspace(*str)) str++;
312    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
313    *endptr = str;
314    return(result);
315    }
316    
317    
318    
319    
320    /*************************************************
321    *            Convert UTF-8 string to value       *
322    *************************************************/
323    
324    /* This function takes one or more bytes that represents a UTF-8 character,
325    and returns the value of the character.
326    
327  /* Debugging function to print the internal form of the regex. This is the same  Argument:
328  code as contained in pcre.c under the DEBUG macro. */    utf8bytes   a pointer to the byte vector
329      vptr        a pointer to an int to receive the value
330    
331  static const char *OP_names[] = {  Returns:      >  0 => the number of bytes consumed
332    "End", "\\A", "\\B", "\\b", "\\D", "\\d",                -6 to 0 => malformed UTF-8 character at offset = (-return)
333    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  */
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
334    
335    #if !defined NOUTF8
336    
337  static void print_internals(pcre *re)  static int
338    utf82ord(unsigned char *utf8bytes, int *vptr)
339  {  {
340  unsigned char *code = ((real_pcre *)re)->code;  int c = *utf8bytes++;
341    int d = c;
342    int i, j, s;
343    
344  fprintf(outfile, "------------------------------------------------------------------\n");  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
345      {
346      if ((d & 0x80) == 0) break;
347      d <<= 1;
348      }
349    
350    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
351    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
352    
353  for(;;)  /* i now has a value in the range 1-5 */
354    
355    s = 6*i;
356    d = (c & utf8_table3[i]) << s;
357    
358    for (j = 0; j < i; j++)
359    {    {
360    int c;    c = *utf8bytes++;
361    int charlength;    if ((c & 0xc0) != 0x80) return -(j+1);
362      s -= 6;
363      d |= (c & 0x3f) << s;
364      }
365    
366    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  /* Check that encoding was the correct unique one */
367    
368    if (*code >= OP_BRA)  for (j = 0; j < utf8_table1_size; j++)
369      {    if (d <= utf8_table1[j]) break;
370      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  if (j != i) return -(i+1);
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
371    
372        CLASS_REF_REPEAT:  /* Valid value */
373    
374        switch(*code)  *vptr = d;
375          {  return i+1;
376          case OP_CRSTAR:  }
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
377    
378          case OP_CRRANGE:  #endif
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
379    
380          default:  
381          code--;  
382    /*************************************************
383    *       Convert character value to UTF-8         *
384    *************************************************/
385    
386    /* This function takes an integer value in the range 0 - 0x7fffffff
387    and encodes it as a UTF-8 character in 0 to 6 bytes.
388    
389    Arguments:
390      cvalue     the character value
391      utf8bytes  pointer to buffer for result - at least 6 bytes long
392    
393    Returns:     number of characters placed in the buffer
394    */
395    
396    #if !defined NOUTF8
397    
398    static int
399    ord2utf8(int cvalue, uschar *utf8bytes)
400    {
401    register int i, j;
402    for (i = 0; i < utf8_table1_size; i++)
403      if (cvalue <= utf8_table1[i]) break;
404    utf8bytes += i;
405    for (j = i; j > 0; j--)
406     {
407     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
408     cvalue >>= 6;
409     }
410    *utf8bytes = utf8_table2[i] | cvalue;
411    return i + 1;
412    }
413    
414    #endif
415    
416    
417    
418    /*************************************************
419    *             Print character string             *
420    *************************************************/
421    
422    /* Character string printing function. Must handle UTF-8 strings in utf8
423    mode. Yields number of characters printed. If handed a NULL file, just counts
424    chars without printing. */
425    
426    static int pchars(unsigned char *p, int length, FILE *f)
427    {
428    int c = 0;
429    int yield = 0;
430    
431    while (length-- > 0)
432      {
433    #if !defined NOUTF8
434      if (use_utf8)
435        {
436        int rc = utf82ord(p, &c);
437    
438        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
439          {
440          length -= rc - 1;
441          p += rc;
442          if (PRINTHEX(c))
443            {
444            if (f != NULL) fprintf(f, "%c", c);
445            yield++;
446            }
447          else
448            {
449            int n = 4;
450            if (f != NULL) fprintf(f, "\\x{%02x}", c);
451            yield += (n <= 0x000000ff)? 2 :
452                     (n <= 0x00000fff)? 3 :
453                     (n <= 0x0000ffff)? 4 :
454                     (n <= 0x000fffff)? 5 : 6;
455          }          }
456          continue;
457        }        }
458      break;      }
459    #endif
460    
461      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
462    
463      default:    c = *p++;
464      fprintf(outfile, "    %s", OP_names[*code]);    if (PRINTHEX(c))
465      break;      {
466        if (f != NULL) fprintf(f, "%c", c);
467        yield++;
468        }
469      else
470        {
471        if (f != NULL) fprintf(f, "\\x%02x", c);
472        yield += 4;
473      }      }
   
   code++;  
   fprintf(outfile, "\n");  
474    }    }
475    
476    return yield;
477  }  }
478    
479    
480    
481  /* Character string printing function. */  /*************************************************
482    *              Callout function                  *
483    *************************************************/
484    
485    /* Called from PCRE as a result of the (?C) item. We print out where we are in
486    the match. Yield zero unless more callouts than the fail count, or the callout
487    data is not zero. */
488    
489  static void pchars(unsigned char *p, int length)  static int callout(pcre_callout_block *cb)
490  {  {
491  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
492  while (length-- > 0)  int i, pre_start, post_start, subject_length;
493    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
494      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
495      {
496      fprintf(f, "Callout %d: last capture = %d\n",
497        cb->callout_number, cb->capture_last);
498    
499      for (i = 0; i < cb->capture_top * 2; i += 2)
500        {
501        if (cb->offset_vector[i] < 0)
502          fprintf(f, "%2d: <unset>\n", i/2);
503        else
504          {
505          fprintf(f, "%2d: ", i/2);
506          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
507            cb->offset_vector[i+1] - cb->offset_vector[i], f);
508          fprintf(f, "\n");
509          }
510        }
511      }
512    
513    /* Re-print the subject in canonical form, the first time or if giving full
514    datails. On subsequent calls in the same match, we use pchars just to find the
515    printed lengths of the substrings. */
516    
517    if (f != NULL) fprintf(f, "--->");
518    
519    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
520    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
521      cb->current_position - cb->start_match, f);
522    
523    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
524    
525    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
526      cb->subject_length - cb->current_position, f);
527    
528    if (f != NULL) fprintf(f, "\n");
529    
530    /* Always print appropriate indicators, with callout number if not already
531    shown. For automatic callouts, show the pattern offset. */
532    
533    if (cb->callout_number == 255)
534      {
535      fprintf(outfile, "%+3d ", cb->pattern_position);
536      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
537      }
538    else
539      {
540      if (callout_extra) fprintf(outfile, "    ");
541        else fprintf(outfile, "%3d ", cb->callout_number);
542      }
543    
544    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
545    fprintf(outfile, "^");
546    
547    if (post_start > 0)
548      {
549      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
550      fprintf(outfile, "^");
551      }
552    
553    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
554      fprintf(outfile, " ");
555    
556    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
557      pbuffer + cb->pattern_position);
558    
559    fprintf(outfile, "\n");
560    first_callout = 0;
561    
562    if (cb->callout_data != NULL)
563      {
564      int callout_data = *((int *)(cb->callout_data));
565      if (callout_data != 0)
566        {
567        fprintf(outfile, "Callout data = %d\n", callout_data);
568        return callout_data;
569        }
570      }
571    
572    return (cb->callout_number != callout_fail_id)? 0 :
573           (++callout_count >= callout_fail_count)? 1 : 0;
574  }  }
575    
576    
577    /*************************************************
578    *            Local malloc functions              *
579    *************************************************/
580    
581  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
582  compiled re. */  compiled re. */
583    
584  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
585  {  {
586  if (log_store)  void *block = malloc(size);
587    fprintf(outfile, "Memory allocation (code space): %d\n",  gotten_store = size;
588      (int)((int)size - offsetof(real_pcre, code[0])));  if (show_malloc)
589  return malloc(size);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
590    return block;
591    }
592    
593    static void new_free(void *block)
594    {
595    if (show_malloc)
596      fprintf(outfile, "free             %p\n", block);
597    free(block);
598    }
599    
600    
601    /* For recursion malloc/free, to test stacking calls */
602    
603    static void *stack_malloc(size_t size)
604    {
605    void *block = malloc(size);
606    if (show_malloc)
607      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
608    return block;
609    }
610    
611    static void stack_free(void *block)
612    {
613    if (show_malloc)
614      fprintf(outfile, "stack_free       %p\n", block);
615    free(block);
616    }
617    
618    
619    /*************************************************
620    *          Call pcre_fullinfo()                  *
621    *************************************************/
622    
623    /* Get one piece of information from the pcre_fullinfo() function */
624    
625    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
626    {
627    int rc;
628    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
629      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
630    }
631    
632    
633    
634    /*************************************************
635    *         Byte flipping function                 *
636    *************************************************/
637    
638    static unsigned long int
639    byteflip(unsigned long int value, int n)
640    {
641    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
642    return ((value & 0x000000ff) << 24) |
643           ((value & 0x0000ff00) <<  8) |
644           ((value & 0x00ff0000) >>  8) |
645           ((value & 0xff000000) >> 24);
646    }
647    
648    
649    
650    
651    /*************************************************
652    *        Check match or recursion limit          *
653    *************************************************/
654    
655    static int
656    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
657      int start_offset, int options, int *use_offsets, int use_size_offsets,
658      int flag, unsigned long int *limit, int errnumber, const char *msg)
659    {
660    int count;
661    int min = 0;
662    int mid = 64;
663    int max = -1;
664    
665    extra->flags |= flag;
666    
667    for (;;)
668      {
669      *limit = mid;
670    
671      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
672        use_offsets, use_size_offsets);
673    
674      if (count == errnumber)
675        {
676        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
677        min = mid;
678        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
679        }
680    
681      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
682                             count == PCRE_ERROR_PARTIAL)
683        {
684        if (mid == min + 1)
685          {
686          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
687          break;
688          }
689        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
690        max = mid;
691        mid = (min + mid)/2;
692        }
693      else break;    /* Some other error */
694      }
695    
696    extra->flags &= ~flag;
697    return count;
698    }
699    
700    
701    
702    /*************************************************
703    *         Case-independent strncmp() function    *
704    *************************************************/
705    
706    /*
707    Arguments:
708      s         first string
709      t         second string
710      n         number of characters to compare
711    
712    Returns:    < 0, = 0, or > 0, according to the comparison
713    */
714    
715    static int
716    strncmpic(uschar *s, uschar *t, int n)
717    {
718    while (n--)
719      {
720      int c = tolower(*s++) - tolower(*t++);
721      if (c) return c;
722      }
723    return 0;
724    }
725    
726    
727    
728    /*************************************************
729    *         Check newline indicator                *
730    *************************************************/
731    
732    /* This is used both at compile and run-time to check for <xxx> escapes, where
733    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
734    no match.
735    
736    Arguments:
737      p           points after the leading '<'
738      f           file for error message
739    
740    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
741    */
742    
743    static int
744    check_newline(uschar *p, FILE *f)
745    {
746    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
747    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
748    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
749    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
750    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
751    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
752    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
753    fprintf(f, "Unknown newline type at: <%s\n", p);
754    return 0;
755  }  }
756    
757    
758    
759    /*************************************************
760    *             Usage function                     *
761    *************************************************/
762    
763    static void
764    usage(void)
765    {
766    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
767    printf("Input and output default to stdin and stdout.\n");
768    #ifdef SUPPORT_LIBREADLINE
769    printf("If input is a terminal, readline() is used to read from it.\n");
770    #else
771    printf("This version of pcretest is not linked with readline().\n");
772    #endif
773    printf("\nOptions:\n");
774    printf("  -b       show compiled code (bytecode)\n");
775    printf("  -C       show PCRE compile-time options and exit\n");
776    printf("  -d       debug: show compiled code and information (-b and -i)\n");
777    #if !defined NODFA
778    printf("  -dfa     force DFA matching for all subjects\n");
779    #endif
780    printf("  -help    show usage information\n");
781    printf("  -i       show information about compiled patterns\n"
782           "  -m       output memory used information\n"
783           "  -o <n>   set size of offsets vector to <n>\n");
784    #if !defined NOPOSIX
785    printf("  -p       use POSIX interface\n");
786    #endif
787    printf("  -q       quiet: do not output PCRE version number at start\n");
788    printf("  -S <n>   set stack size to <n> megabytes\n");
789    printf("  -s       output store (memory) used information\n"
790           "  -t       time compilation and execution\n");
791    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
792    printf("  -tm      time execution (matching) only\n");
793    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
794    }
795    
796    
797    
798    /*************************************************
799    *                Main Program                    *
800    *************************************************/
801    
802  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
803  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
804  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 300  int options = 0; Line 810  int options = 0;
810  int study_options = 0;  int study_options = 0;
811  int op = 1;  int op = 1;
812  int timeit = 0;  int timeit = 0;
813    int timeitm = 0;
814  int showinfo = 0;  int showinfo = 0;
815  int showstore = 0;  int showstore = 0;
816    int quiet = 0;
817    int size_offsets = 45;
818    int size_offsets_max;
819    int *offsets = NULL;
820    #if !defined NOPOSIX
821  int posix = 0;  int posix = 0;
822    #endif
823  int debug = 0;  int debug = 0;
824  int done = 0;  int done = 0;
825  unsigned char buffer[30000];  int all_use_dfa = 0;
826  unsigned char dbuffer[1024];  int yield = 0;
827    int stack_size;
828    
829    /* These vectors store, end-to-end, a list of captured substring names. Assume
830    that 1024 is plenty long enough for the few names we'll be testing. */
831    
832    uschar copynames[1024];
833    uschar getnames[1024];
834    
835    uschar *copynamesptr;
836    uschar *getnamesptr;
837    
838    /* Get buffers from malloc() so that Electric Fence will check their misuse
839    when I am debugging. They grow automatically when very long lines are read. */
840    
841  /* Static so that new_malloc can use it. */  buffer = (unsigned char *)malloc(buffer_size);
842    dbuffer = (unsigned char *)malloc(buffer_size);
843    pbuffer = (unsigned char *)malloc(buffer_size);
844    
845    /* The outfile variable is static so that new_malloc can use it. */
846    
847  outfile = stdout;  outfile = stdout;
848    
849    /* The following  _setmode() stuff is some Windows magic that tells its runtime
850    library to translate CRLF into a single LF character. At least, that's what
851    I've been told: never having used Windows I take this all on trust. Originally
852    it set 0x8000, but then I was advised that _O_BINARY was better. */
853    
854    #if defined(_WIN32) || defined(WIN32)
855    _setmode( _fileno( stdout ), _O_BINARY );
856    #endif
857    
858  /* Scan options */  /* Scan options */
859    
860  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
861    {    {
862      unsigned char *endptr;
863    
864    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
865      showstore = 1;      showstore = 1;
866    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
867      else if (strcmp(argv[op], "-b") == 0) debug = 1;
868    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
869    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
870    #if !defined NODFA
871      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
872    #endif
873      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
874          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
875            *endptr == 0))
876        {
877        op++;
878        argc--;
879        }
880      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
881        {
882        int both = argv[op][2] == 0;
883        int temp;
884        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
885                         *endptr == 0))
886          {
887          timeitm = temp;
888          op++;
889          argc--;
890          }
891        else timeitm = LOOPREPEAT;
892        if (both) timeit = timeitm;
893        }
894      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
895          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
896            *endptr == 0))
897        {
898    #if defined(_WIN32) || defined(WIN32)
899        printf("PCRE: -S not supported on this OS\n");
900        exit(1);
901    #else
902        int rc;
903        struct rlimit rlim;
904        getrlimit(RLIMIT_STACK, &rlim);
905        rlim.rlim_cur = stack_size * 1024 * 1024;
906        rc = setrlimit(RLIMIT_STACK, &rlim);
907        if (rc != 0)
908          {
909        printf("PCRE: setrlimit() failed with error %d\n", rc);
910        exit(1);
911          }
912        op++;
913        argc--;
914    #endif
915        }
916    #if !defined NOPOSIX
917    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
918    #endif
919      else if (strcmp(argv[op], "-C") == 0)
920        {
921        int rc;
922        printf("PCRE version %s\n", pcre_version());
923        printf("Compiled with\n");
924        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
925        printf("  %sUTF-8 support\n", rc? "" : "No ");
926        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
927        printf("  %sUnicode properties support\n", rc? "" : "No ");
928        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
929        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
930          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
931          (rc == -2)? "ANYCRLF" :
932          (rc == -1)? "ANY" : "???");
933        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
934        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
935                                         "all Unicode newlines");
936        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
937        printf("  Internal link size = %d\n", rc);
938        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
939        printf("  POSIX malloc threshold = %d\n", rc);
940        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
941        printf("  Default match limit = %d\n", rc);
942        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
943        printf("  Default recursion depth limit = %d\n", rc);
944        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
945        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
946        goto EXIT;
947        }
948      else if (strcmp(argv[op], "-help") == 0 ||
949               strcmp(argv[op], "--help") == 0)
950        {
951        usage();
952        goto EXIT;
953        }
954    else    else
955      {      {
956      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
957      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
958      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
959             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
960      }      }
961    op++;    op++;
962    argc--;    argc--;
963    }    }
964    
965    /* Get the store for the offsets vector, and remember what it was */
966    
967    size_offsets_max = size_offsets;
968    offsets = (int *)malloc(size_offsets_max * sizeof(int));
969    if (offsets == NULL)
970      {
971      printf("** Failed to get %d bytes of memory for offsets vector\n",
972        (int)(size_offsets_max * sizeof(int)));
973      yield = 1;
974      goto EXIT;
975      }
976    
977  /* Sort out the input and output files */  /* Sort out the input and output files */
978    
979  if (argc > 1)  if (argc > 1)
980    {    {
981    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
982    if (infile == NULL)    if (infile == NULL)
983      {      {
984      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
985      return 1;      yield = 1;
986        goto EXIT;
987      }      }
988    }    }
989    
990  if (argc > 2)  if (argc > 2)
991    {    {
992    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
993    if (outfile == NULL)    if (outfile == NULL)
994      {      {
995      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
996      return 1;      yield = 1;
997        goto EXIT;
998      }      }
999    }    }
1000    
1001  /* Set alternative malloc function */  /* Set alternative malloc function */
1002    
1003  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1004    pcre_free = new_free;
1005    pcre_stack_malloc = stack_malloc;
1006    pcre_stack_free = stack_free;
1007    
1008  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1009    
1010  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1011    
1012  /* Main loop */  /* Main loop */
1013    
# Line 376  while (!done) Line 1018  while (!done)
1018    
1019  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
1020    regex_t preg;    regex_t preg;
1021      int do_posix = 0;
1022  #endif  #endif
1023    
1024    const char *error;    const char *error;
1025    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1026    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
1027      const unsigned char *tables = NULL;
1028      unsigned long int true_size, true_study_size = 0;
1029      size_t size, regex_gotten_store;
1030    int do_study = 0;    int do_study = 0;
1031    int do_debug = debug;    int do_debug = debug;
1032    int do_G = 0;    int do_G = 0;
1033    int do_g = 0;    int do_g = 0;
1034    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1035    int do_showrest = 0;    int do_showrest = 0;
1036    int do_posix = 0;    int do_flip = 0;
1037    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
1038    
1039      use_utf8 = 0;
1040      debug_lengths = 1;
1041    
1042    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
1043    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1044      fflush(outfile);
1045    
1046    p = buffer;    p = buffer;
1047    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1048    if (*p == 0) continue;    if (*p == 0) continue;
1049    
1050    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1051    complete, read more. */  
1052      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1053        {
1054        unsigned long int magic, get_options;
1055        uschar sbuf[8];
1056        FILE *f;
1057    
1058        p++;
1059        pp = p + (int)strlen((char *)p);
1060        while (isspace(pp[-1])) pp--;
1061        *pp = 0;
1062    
1063        f = fopen((char *)p, "rb");
1064        if (f == NULL)
1065          {
1066          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1067          continue;
1068          }
1069    
1070        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1071    
1072        true_size =
1073          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1074        true_study_size =
1075          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1076    
1077        re = (real_pcre *)new_malloc(true_size);
1078        regex_gotten_store = gotten_store;
1079    
1080        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1081    
1082        magic = ((real_pcre *)re)->magic_number;
1083        if (magic != MAGIC_NUMBER)
1084          {
1085          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1086            {
1087            do_flip = 1;
1088            }
1089          else
1090            {
1091            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1092            fclose(f);
1093            continue;
1094            }
1095          }
1096    
1097        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1098          do_flip? " (byte-inverted)" : "", p);
1099    
1100        /* Need to know if UTF-8 for printing data strings */
1101    
1102        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1103        use_utf8 = (get_options & PCRE_UTF8) != 0;
1104    
1105        /* Now see if there is any following study data */
1106    
1107        if (true_study_size != 0)
1108          {
1109          pcre_study_data *psd;
1110    
1111          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1112          extra->flags = PCRE_EXTRA_STUDY_DATA;
1113    
1114          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1115          extra->study_data = psd;
1116    
1117          if (fread(psd, 1, true_study_size, f) != true_study_size)
1118            {
1119            FAIL_READ:
1120            fprintf(outfile, "Failed to read data from %s\n", p);
1121            if (extra != NULL) new_free(extra);
1122            if (re != NULL) new_free(re);
1123            fclose(f);
1124            continue;
1125            }
1126          fprintf(outfile, "Study data loaded from %s\n", p);
1127          do_study = 1;     /* To get the data output if requested */
1128          }
1129        else fprintf(outfile, "No study data\n");
1130    
1131        fclose(f);
1132        goto SHOW_INFO;
1133        }
1134    
1135      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1136      the pattern; if is isn't complete, read more. */
1137    
1138    delimiter = *p++;    delimiter = *p++;
1139    
1140    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1141      {      {
1142      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1143      goto SKIP_DATA;      goto SKIP_DATA;
1144      }      }
1145    
1146    pp = p;    pp = p;
1147      poffset = p - buffer;
1148    
1149    for(;;)    for(;;)
1150      {      {
# Line 420  while (!done) Line 1155  while (!done)
1155        pp++;        pp++;
1156        }        }
1157      if (*pp != 0) break;      if (*pp != 0) break;
1158        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1159        {        {
1160        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1161        done = 1;        done = 1;
# Line 438  while (!done) Line 1164  while (!done)
1164      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1165      }      }
1166    
1167      /* The buffer may have moved while being extended; reset the start of data
1168      pointer to the correct relative point in the buffer. */
1169    
1170      p = buffer + poffset;
1171    
1172    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1173    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1174    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1175    
1176    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1177    
1178    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1179      for callouts. */
1180    
1181    *pp++ = 0;    *pp++ = 0;
1182      strcpy((char *)pbuffer, (char *)p);
1183    
1184    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1185    
# Line 458  while (!done) Line 1191  while (!done)
1191      {      {
1192      switch (*pp++)      switch (*pp++)
1193        {        {
1194          case 'f': options |= PCRE_FIRSTLINE; break;
1195        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1196        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1197        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 466  while (!done) Line 1200  while (!done)
1200    
1201        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1202        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1203          case 'B': do_debug = 1; break;
1204          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1205        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1206        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1207          case 'F': do_flip = 1; break;
1208        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1209        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1210          case 'J': options |= PCRE_DUPNAMES; break;
1211        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1212          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1213    
1214  #if !defined NOPOSIX  #if !defined NOPOSIX
1215        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 479  while (!done) Line 1218  while (!done)
1218        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1219        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1220        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1221          case 'Z': debug_lengths = 0; break;
1222          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1223          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1224    
1225        case 'L':        case 'L':
1226        ppp = pp;        ppp = pp;
1227        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1228          /* The '0' test is just in case this is an unterminated line. */
1229          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1230        *ppp = 0;        *ppp = 0;
1231        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1232          {          {
1233          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1234          goto SKIP_DATA;          goto SKIP_DATA;
1235          }          }
1236          locale_set = 1;
1237        tables = pcre_maketables();        tables = pcre_maketables();
1238        pp = ppp;        pp = ppp;
1239        break;        break;
1240    
1241        case '\n': case ' ': break;        case '>':
1242          to_file = pp;
1243          while (*pp != 0) pp++;
1244          while (isspace(pp[-1])) pp--;
1245          *pp = 0;
1246          break;
1247    
1248          case '<':
1249            {
1250            int x = check_newline(pp, outfile);
1251            if (x == 0) goto SKIP_DATA;
1252            options |= x;
1253            while (*pp++ != '>');
1254            }
1255          break;
1256    
1257          case '\r':                      /* So that it works in Windows */
1258          case '\n':
1259          case ' ':
1260          break;
1261    
1262        default:        default:
1263        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1264        goto SKIP_DATA;        goto SKIP_DATA;
# Line 509  while (!done) Line 1274  while (!done)
1274      {      {
1275      int rc;      int rc;
1276      int cflags = 0;      int cflags = 0;
1277    
1278      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1279      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1280        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1281        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1282        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1283    
1284      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1285    
1286      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 518  while (!done) Line 1288  while (!done)
1288    
1289      if (rc != 0)      if (rc != 0)
1290        {        {
1291        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1292        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1293        goto SKIP_DATA;        goto SKIP_DATA;
1294        }        }
# Line 530  while (!done) Line 1300  while (!done)
1300  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1301    
1302      {      {
1303      if (timeit)      if (timeit > 0)
1304        {        {
1305        register int i;        register int i;
1306        clock_t time_taken;        clock_t time_taken;
1307        clock_t start_time = clock();        clock_t start_time = clock();
1308        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1309          {          {
1310          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1311          if (re != NULL) free(re);          if (re != NULL) free(re);
1312          }          }
1313        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1314        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1315          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1316          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1317        }        }
1318    
1319      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 559  while (!done) Line 1329  while (!done)
1329          {          {
1330          for (;;)          for (;;)
1331            {            {
1332            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1333              {              {
1334              done = 1;              done = 1;
1335              goto CONTINUE;              goto CONTINUE;
# Line 570  while (!done) Line 1340  while (!done)
1340            }            }
1341          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1342          }          }
1343        goto CONTINUE;        goto CONTINUE;
1344          }
1345    
1346        /* Compilation succeeded; print data if required. There are now two
1347        info-returning functions. The old one has a limited interface and
1348        returns only limited data. Check that it agrees with the newer one. */
1349    
1350        if (log_store)
1351          fprintf(outfile, "Memory allocation (code space): %d\n",
1352            (int)(gotten_store -
1353                  sizeof(real_pcre) -
1354                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1355    
1356        /* Extract the size for possible writing before possibly flipping it,
1357        and remember the store that was got. */
1358    
1359        true_size = ((real_pcre *)re)->size;
1360        regex_gotten_store = gotten_store;
1361    
1362        /* If /S was present, study the regexp to generate additional info to
1363        help with the matching. */
1364    
1365        if (do_study)
1366          {
1367          if (timeit > 0)
1368            {
1369            register int i;
1370            clock_t time_taken;
1371            clock_t start_time = clock();
1372            for (i = 0; i < timeit; i++)
1373              extra = pcre_study(re, study_options, &error);
1374            time_taken = clock() - start_time;
1375            if (extra != NULL) free(extra);
1376            fprintf(outfile, "  Study time %.4f milliseconds\n",
1377              (((double)time_taken * 1000.0) / (double)timeit) /
1378                (double)CLOCKS_PER_SEC);
1379            }
1380          extra = pcre_study(re, study_options, &error);
1381          if (error != NULL)
1382            fprintf(outfile, "Failed to study: %s\n", error);
1383          else if (extra != NULL)
1384            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1385          }
1386    
1387        /* If the 'F' option was present, we flip the bytes of all the integer
1388        fields in the regex data block and the study block. This is to make it
1389        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1390        compiled on a different architecture. */
1391    
1392        if (do_flip)
1393          {
1394          real_pcre *rre = (real_pcre *)re;
1395          rre->magic_number =
1396            byteflip(rre->magic_number, sizeof(rre->magic_number));
1397          rre->size = byteflip(rre->size, sizeof(rre->size));
1398          rre->options = byteflip(rre->options, sizeof(rre->options));
1399          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1400          rre->top_bracket =
1401            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1402          rre->top_backref =
1403            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1404          rre->first_byte =
1405            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1406          rre->req_byte =
1407            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1408          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1409            sizeof(rre->name_table_offset));
1410          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1411            sizeof(rre->name_entry_size));
1412          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1413            sizeof(rre->name_count));
1414    
1415          if (extra != NULL)
1416            {
1417            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1418            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1419            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1420            }
1421        }        }
1422    
1423      /* Compilation succeeded; print data if required */      /* Extract information from the compiled data if required */
1424    
1425      if (do_showinfo)      SHOW_INFO:
1426    
1427        if (do_debug)
1428        {        {
1429        int first_char, count;        fprintf(outfile, "------------------------------------------------------------------\n");
1430          pcre_printint(re, outfile, debug_lengths);
1431          }
1432    
1433        if (do_debug) print_internals(re);      if (do_showinfo)
1434          {
1435          unsigned long int get_options, all_options;
1436    #if !defined NOINFOCHECK
1437          int old_first_char, old_options, old_count;
1438    #endif
1439          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1440            hascrorlf;
1441          int nameentrysize, namecount;
1442          const uschar *nametable;
1443    
1444          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1445          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1446          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1447          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1448          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1449          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1450          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1451          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1452          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1453          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1454          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1455          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1456    
1457        count = pcre_info(re, &options, &first_char);  #if !defined NOINFOCHECK
1458          old_count = pcre_info(re, &old_options, &old_first_char);
1459        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1460          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
1461        else        else
1462          {          {
1463          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
1464          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1465            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
1466              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
1467              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
1468              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1469              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
1470              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
1471              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != (int)get_options) fprintf(outfile,
1472              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1473              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              get_options, old_options);
1474            }
1475          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)  #endif
           fprintf(outfile, "Case state changes\n");  
1476    
1477          if (first_char == -1)        if (size != regex_gotten_store) fprintf(outfile,
1478            {          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1479            fprintf(outfile, "First char at start or follows \\n\n");          (int)size, (int)regex_gotten_store);
1480            }  
1481          else if (first_char < 0)        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1482          if (backrefmax > 0)
1483            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1484    
1485          if (namecount > 0)
1486            {
1487            fprintf(outfile, "Named capturing subpatterns:\n");
1488            while (namecount-- > 0)
1489            {            {
1490            fprintf(outfile, "No first char\n");            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1491                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1492                GET2(nametable, 0));
1493              nametable += nameentrysize;
1494            }            }
1495            }
1496    
1497          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1498          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1499    
1500          all_options = ((real_pcre *)re)->options;
1501          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1502    
1503          if (get_options == 0) fprintf(outfile, "No options\n");
1504            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1505              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1506              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1507              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1508              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1509              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1510              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1511              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1512              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1513              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1514              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1515              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1516              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1517              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1518              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1519              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1520    
1521          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1522    
1523          switch (get_options & PCRE_NEWLINE_BITS)
1524            {
1525            case PCRE_NEWLINE_CR:
1526            fprintf(outfile, "Forced newline sequence: CR\n");
1527            break;
1528    
1529            case PCRE_NEWLINE_LF:
1530            fprintf(outfile, "Forced newline sequence: LF\n");
1531            break;
1532    
1533            case PCRE_NEWLINE_CRLF:
1534            fprintf(outfile, "Forced newline sequence: CRLF\n");
1535            break;
1536    
1537            case PCRE_NEWLINE_ANYCRLF:
1538            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1539            break;
1540    
1541            case PCRE_NEWLINE_ANY:
1542            fprintf(outfile, "Forced newline sequence: ANY\n");
1543            break;
1544    
1545            default:
1546            break;
1547            }
1548    
1549          if (first_char == -1)
1550            {
1551            fprintf(outfile, "First char at start or follows newline\n");
1552            }
1553          else if (first_char < 0)
1554            {
1555            fprintf(outfile, "No first char\n");
1556            }
1557          else
1558            {
1559            int ch = first_char & 255;
1560            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1561              "" : " (caseless)";
1562            if (PRINTHEX(ch))
1563              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1564          else          else
1565            {            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1566            if (isprint(first_char))          }
1567              fprintf(outfile, "First char = \'%c\'\n", first_char);  
1568            else        if (need_char < 0)
1569              fprintf(outfile, "First char = %d\n", first_char);          {
1570            }          fprintf(outfile, "No need char\n");
1571            }
1572          else
1573            {
1574            int ch = need_char & 255;
1575            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1576              "" : " (caseless)";
1577            if (PRINTHEX(ch))
1578              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1579            else
1580              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1581            }
1582    
1583          /* Don't output study size; at present it is in any case a fixed
1584          value, but it varies, depending on the computer architecture, and
1585          so messes up the test suite. (And with the /F option, it might be
1586          flipped.) */
1587    
1588          if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)        if (do_study)
1589            {
1590            if (extra == NULL)
1591              fprintf(outfile, "Study returned NULL\n");
1592            else
1593            {            {
1594            int req_char = ((real_pcre *)re)->req_char;            uschar *start_bits = NULL;
1595            if (isprint(req_char))            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1596              fprintf(outfile, "Req char = \'%c\'\n", req_char);  
1597              if (start_bits == NULL)
1598                fprintf(outfile, "No starting byte set\n");
1599            else            else
1600              fprintf(outfile, "Req char = %d\n", req_char);              {
1601                int i;
1602                int c = 24;
1603                fprintf(outfile, "Starting byte set: ");
1604                for (i = 0; i < 256; i++)
1605                  {
1606                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1607                    {
1608                    if (c > 75)
1609                      {
1610                      fprintf(outfile, "\n  ");
1611                      c = 2;
1612                      }
1613                    if (PRINTHEX(i) && i != ' ')
1614                      {
1615                      fprintf(outfile, "%c ", i);
1616                      c += 2;
1617                      }
1618                    else
1619                      {
1620                      fprintf(outfile, "\\x%02x ", i);
1621                      c += 5;
1622                      }
1623                    }
1624                  }
1625                fprintf(outfile, "\n");
1626                }
1627            }            }
         else fprintf(outfile, "No req char\n");  
1628          }          }
1629        }        }
1630    
1631      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1632      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1633        the study length, in big-endian order. */
1634    
1635      if (do_study)      if (to_file != NULL)
1636        {        {
1637        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1638          if (f == NULL)
1639          {          {
1640          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
1641          }          }
1642          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (do_showinfo)  
1643          {          {
1644          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1645          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (uschar)((true_size >> 24) & 255);
1646            fprintf(outfile, "No starting character set\n");          sbuf[1] = (uschar)((true_size >> 16) & 255);
1647            sbuf[2] = (uschar)((true_size >>  8) & 255);
1648            sbuf[3] = (uschar)((true_size) & 255);
1649    
1650            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1651            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1652            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1653            sbuf[7] = (uschar)((true_study_size) & 255);
1654    
1655            if (fwrite(sbuf, 1, 8, f) < 8 ||
1656                fwrite(re, 1, true_size, f) < true_size)
1657              {
1658              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1659              }
1660          else          else
1661            {            {
1662            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1663            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1664              {              {
1665              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1666                    true_study_size)
1667                {                {
1668                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1669                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1670                }                }
1671                else fprintf(outfile, "Study data written to %s\n", to_file);
1672    
1673              }              }
           fprintf(outfile, "\n");  
1674            }            }
1675            fclose(f);
1676          }          }
1677    
1678          new_free(re);
1679          if (extra != NULL) new_free(extra);
1680          if (tables != NULL) new_free((void *)tables);
1681          continue;  /* With next regex */
1682        }        }
1683      }      }        /* End of non-POSIX compile */
1684    
1685    /* Read data lines and test them */    /* Read data lines and test them */
1686    
1687    for (;;)    for (;;)
1688      {      {
1689      unsigned char *q;      uschar *q;
1690      unsigned char *bptr = dbuffer;      uschar *bptr;
1691        int *use_offsets = offsets;
1692        int use_size_offsets = size_offsets;
1693        int callout_data = 0;
1694        int callout_data_set = 0;
1695      int count, c;      int count, c;
1696      int copystrings = 0;      int copystrings = 0;
1697        int find_match_limit = 0;
1698      int getstrings = 0;      int getstrings = 0;
1699      int getlist = 0;      int getlist = 0;
1700      int gmatched = 0;      int gmatched = 0;
1701      int start_offset = 0;      int start_offset = 0;
1702      int g_notempty = 0;      int g_notempty = 0;
1703      int offsets[45];      int use_dfa = 0;
     int size_offsets = sizeof(offsets)/sizeof(int);  
1704    
1705      options = 0;      options = 0;
1706    
1707      if (infile == stdin) printf("data> ");      *copynames = 0;
1708      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1709    
1710        copynamesptr = copynames;
1711        getnamesptr = getnames;
1712    
1713        pcre_callout = callout;
1714        first_callout = 1;
1715        callout_extra = 0;
1716        callout_count = 0;
1717        callout_fail_count = 999999;
1718        callout_fail_id = -1;
1719        show_malloc = 0;
1720    
1721        if (extra != NULL) extra->flags &=
1722          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1723    
1724        len = 0;
1725        for (;;)
1726        {        {
1727        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1728        goto CONTINUE;          {
1729            if (len > 0) break;
1730            done = 1;
1731            goto CONTINUE;
1732            }
1733          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1734          len = (int)strlen((char *)buffer);
1735          if (buffer[len-1] == '\n') break;
1736        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1737    
     len = (int)strlen((char *)buffer);  
1738      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1739      buffer[len] = 0;      buffer[len] = 0;
1740      if (len == 0) break;      if (len == 0) break;
# Line 728  while (!done) Line 1742  while (!done)
1742      p = buffer;      p = buffer;
1743      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1744    
1745      q = dbuffer;      bptr = q = dbuffer;
1746      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1747        {        {
1748        int i = 0;        int i = 0;
1749        int n = 0;        int n = 0;
1750    
1751        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1752          {          {
1753          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 749  while (!done) Line 1764  while (!done)
1764          c -= '0';          c -= '0';
1765          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1766            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1767    
1768    #if !defined NOUTF8
1769            if (use_utf8 && c > 255)
1770              {
1771              unsigned char buff8[8];
1772              int ii, utn;
1773              utn = ord2utf8(c, buff8);
1774              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1775              c = buff8[ii];   /* Last byte */
1776              }
1777    #endif
1778          break;          break;
1779    
1780          case 'x':          case 'x':
1781    
1782            /* Handle \x{..} specially - new Perl thing for utf8 */
1783    
1784    #if !defined NOUTF8
1785            if (*p == '{')
1786              {
1787              unsigned char *pt = p;
1788              c = 0;
1789              while (isxdigit(*(++pt)))
1790                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1791              if (*pt == '}')
1792                {
1793                unsigned char buff8[8];
1794                int ii, utn;
1795                utn = ord2utf8(c, buff8);
1796                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1797                c = buff8[ii];   /* Last byte */
1798                p = pt + 1;
1799                break;
1800                }
1801              /* Not correct form; fall through */
1802              }
1803    #endif
1804    
1805            /* Ordinary \x */
1806    
1807          c = 0;          c = 0;
1808          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1809            {            {
# Line 760  while (!done) Line 1812  while (!done)
1812            }            }
1813          break;          break;
1814    
1815          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1816          p--;          p--;
1817          continue;          continue;
1818    
1819            case '>':
1820            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1821            continue;
1822    
1823          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1824          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1825          continue;          continue;
# Line 773  while (!done) Line 1829  while (!done)
1829          continue;          continue;
1830    
1831          case 'C':          case 'C':
1832          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1833          copystrings |= 1 << n;            {
1834              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1835              copystrings |= 1 << n;
1836              }
1837            else if (isalnum(*p))
1838              {
1839              uschar *npp = copynamesptr;
1840              while (isalnum(*p)) *npp++ = *p++;
1841              *npp++ = 0;
1842              *npp = 0;
1843              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1844              if (n < 0)
1845                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1846              copynamesptr = npp;
1847              }
1848            else if (*p == '+')
1849              {
1850              callout_extra = 1;
1851              p++;
1852              }
1853            else if (*p == '-')
1854              {
1855              pcre_callout = NULL;
1856              p++;
1857              }
1858            else if (*p == '!')
1859              {
1860              callout_fail_id = 0;
1861              p++;
1862              while(isdigit(*p))
1863                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1864              callout_fail_count = 0;
1865              if (*p == '!')
1866                {
1867                p++;
1868                while(isdigit(*p))
1869                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1870                }
1871              }
1872            else if (*p == '*')
1873              {
1874              int sign = 1;
1875              callout_data = 0;
1876              if (*(++p) == '-') { sign = -1; p++; }
1877              while(isdigit(*p))
1878                callout_data = callout_data * 10 + *p++ - '0';
1879              callout_data *= sign;
1880              callout_data_set = 1;
1881              }
1882            continue;
1883    
1884    #if !defined NODFA
1885            case 'D':
1886    #if !defined NOPOSIX
1887            if (posix || do_posix)
1888              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1889            else
1890    #endif
1891              use_dfa = 1;
1892            continue;
1893    
1894            case 'F':
1895            options |= PCRE_DFA_SHORTEST;
1896          continue;          continue;
1897    #endif
1898    
1899          case 'G':          case 'G':
1900          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1901          getstrings |= 1 << n;            {
1902              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1903              getstrings |= 1 << n;
1904              }
1905            else if (isalnum(*p))
1906              {
1907              uschar *npp = getnamesptr;
1908              while (isalnum(*p)) *npp++ = *p++;
1909              *npp++ = 0;
1910              *npp = 0;
1911              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1912              if (n < 0)
1913                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1914              getnamesptr = npp;
1915              }
1916          continue;          continue;
1917    
1918          case 'L':          case 'L':
1919          getlist = 1;          getlist = 1;
1920          continue;          continue;
1921    
1922            case 'M':
1923            find_match_limit = 1;
1924            continue;
1925    
1926          case 'N':          case 'N':
1927          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1928          continue;          continue;
1929    
1930          case 'O':          case 'O':
1931          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1932          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1933              {
1934              size_offsets_max = n;
1935              free(offsets);
1936              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1937              if (offsets == NULL)
1938                {
1939                printf("** Failed to get %d bytes of memory for offsets vector\n",
1940                  (int)(size_offsets_max * sizeof(int)));
1941                yield = 1;
1942                goto EXIT;
1943                }
1944              }
1945            use_size_offsets = n;
1946            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1947            continue;
1948    
1949            case 'P':
1950            options |= PCRE_PARTIAL;
1951            continue;
1952    
1953            case 'Q':
1954            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1955            if (extra == NULL)
1956              {
1957              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1958              extra->flags = 0;
1959              }
1960            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1961            extra->match_limit_recursion = n;
1962            continue;
1963    
1964            case 'q':
1965            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1966            if (extra == NULL)
1967              {
1968              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1969              extra->flags = 0;
1970              }
1971            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1972            extra->match_limit = n;
1973            continue;
1974    
1975    #if !defined NODFA
1976            case 'R':
1977            options |= PCRE_DFA_RESTART;
1978            continue;
1979    #endif
1980    
1981            case 'S':
1982            show_malloc = 1;
1983          continue;          continue;
1984    
1985          case 'Z':          case 'Z':
1986          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1987          continue;          continue;
1988    
1989            case '?':
1990            options |= PCRE_NO_UTF8_CHECK;
1991            continue;
1992    
1993            case '<':
1994              {
1995              int x = check_newline(p, outfile);
1996              if (x == 0) goto NEXT_DATA;
1997              options |= x;
1998              while (*p++ != '>');
1999              }
2000            continue;
2001          }          }
2002        *q++ = c;        *q++ = c;
2003        }        }
2004      *q = 0;      *q = 0;
2005      len = q - dbuffer;      len = q - dbuffer;
2006    
2007        if ((all_use_dfa || use_dfa) && find_match_limit)
2008          {
2009          printf("**Match limit not relevant for DFA matching: ignored\n");
2010          find_match_limit = 0;
2011          }
2012    
2013      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2014      support timing. */      support timing or playing with the match limit or callout data. */
2015    
2016  #if !defined NOPOSIX  #if !defined NOPOSIX
2017      if (posix || do_posix)      if (posix || do_posix)
2018        {        {
2019        int rc;        int rc;
2020        int eflags = 0;        int eflags = 0;
2021        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
2022          if (use_size_offsets > 0)
2023            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2024        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2025        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2026    
2027        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2028    
2029        if (rc != 0)        if (rc != 0)
2030          {          {
2031          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2032          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2033          }          }
2034          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2035                  != 0)
2036            {
2037            fprintf(outfile, "Matched with REG_NOSUB\n");
2038            }
2039        else        else
2040          {          {
2041          size_t i;          size_t i;
2042          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
2043            {            {
2044            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
2045              {              {
2046              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
2047              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
2048                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2049              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2050              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
2051                {                {
2052                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
2053                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2054                    outfile);
2055                fprintf(outfile, "\n");                fprintf(outfile, "\n");
2056                }                }
2057              }              }
2058            }            }
2059          }          }
2060          free(pmatch);
2061        }        }
2062    
2063      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 852  while (!done) Line 2067  while (!done)
2067    
2068      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2069        {        {
2070        if (timeit)        if (timeitm > 0)
2071          {          {
2072          register int i;          register int i;
2073          clock_t time_taken;          clock_t time_taken;
2074          clock_t start_time = clock();          clock_t start_time = clock();
2075          for (i = 0; i < LOOPREPEAT; i++)  
2076    #if !defined NODFA
2077            if (all_use_dfa || use_dfa)
2078              {
2079              int workspace[1000];
2080              for (i = 0; i < timeitm; i++)
2081                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2082                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2083                  sizeof(workspace)/sizeof(int));
2084              }
2085            else
2086    #endif
2087    
2088            for (i = 0; i < timeitm; i++)
2089            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2090              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2091    
2092          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2093          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2094            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2095            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2096          }          }
2097    
2098        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
2099          start_offset, options | g_notempty, offsets, size_offsets);        varying limits in order to find the minimum value for the match limit and
2100          for the recursion limit. */
2101    
2102        if (count == 0)        if (find_match_limit)
2103          {          {
2104          fprintf(outfile, "Matched, but too many substrings\n");          if (extra == NULL)
2105          count = size_offsets/3;            {
2106              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2107              extra->flags = 0;
2108              }
2109    
2110            (void)check_match_limit(re, extra, bptr, len, start_offset,
2111              options|g_notempty, use_offsets, use_size_offsets,
2112              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2113              PCRE_ERROR_MATCHLIMIT, "match()");
2114    
2115            count = check_match_limit(re, extra, bptr, len, start_offset,
2116              options|g_notempty, use_offsets, use_size_offsets,
2117              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2118              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2119            }
2120    
2121          /* If callout_data is set, use the interface with additional data */
2122    
2123          else if (callout_data_set)
2124            {
2125            if (extra == NULL)
2126              {
2127              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2128              extra->flags = 0;
2129              }
2130            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2131            extra->callout_data = &callout_data;
2132            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2133              options | g_notempty, use_offsets, use_size_offsets);
2134            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2135            }
2136    
2137          /* The normal case is just to do the match once, with the default
2138          value of match_limit. */
2139    
2140    #if !defined NODFA
2141          else if (all_use_dfa || use_dfa)
2142            {
2143            int workspace[1000];
2144            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2145              options | g_notempty, use_offsets, use_size_offsets, workspace,
2146              sizeof(workspace)/sizeof(int));
2147            if (count == 0)
2148              {
2149              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2150              count = use_size_offsets/2;
2151              }
2152            }
2153    #endif
2154    
2155          else
2156            {
2157            count = pcre_exec(re, extra, (char *)bptr, len,
2158              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2159            if (count == 0)
2160              {
2161              fprintf(outfile, "Matched, but too many substrings\n");
2162              count = use_size_offsets/3;
2163              }
2164          }          }
2165    
2166        /* Matched */        /* Matched */
2167    
2168        if (count >= 0)        if (count >= 0)
2169          {          {
2170          int i;          int i, maxcount;
2171    
2172    #if !defined NODFA
2173            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2174    #endif
2175              maxcount = use_size_offsets/3;
2176    
2177            /* This is a check against a lunatic return value. */
2178    
2179            if (count > maxcount)
2180              {
2181              fprintf(outfile,
2182                "** PCRE error: returned count %d is too big for offset size %d\n",
2183                count, use_size_offsets);
2184              count = use_size_offsets/3;
2185              if (do_g || do_G)
2186                {
2187                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2188                do_g = do_G = FALSE;        /* Break g/G loop */
2189                }
2190              }
2191    
2192          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2193            {            {
2194            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2195              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2196            else            else
2197              {              {
2198              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2199              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2200                  use_offsets[i+1] - use_offsets[i], outfile);
2201              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2202              if (i == 0)              if (i == 0)
2203                {                {
2204                if (do_showrest)                if (do_showrest)
2205                  {                  {
2206                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
2207                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2208                      outfile);
2209                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
2210                  }                  }
2211                }                }
# Line 905  while (!done) Line 2216  while (!done)
2216            {            {
2217            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2218              {              {
2219              char copybuffer[16];              char copybuffer[256];
2220              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2221                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2222              if (rc < 0)              if (rc < 0)
2223                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 915  while (!done) Line 2226  while (!done)
2226              }              }
2227            }            }
2228    
2229            for (copynamesptr = copynames;
2230                 *copynamesptr != 0;
2231                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2232              {
2233              char copybuffer[256];
2234              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2235                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2236              if (rc < 0)
2237                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2238              else
2239                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2240              }
2241    
2242          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2243            {            {
2244            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
2245              {              {
2246              const char *substring;              const char *substring;
2247              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2248                i, &substring);                i, &substring);
2249              if (rc < 0)              if (rc < 0)
2250                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
2251              else              else
2252                {                {
2253                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2254                free((void *)substring);                pcre_free_substring(substring);
2255                }                }
2256              }              }
2257            }            }
2258    
2259            for (getnamesptr = getnames;
2260                 *getnamesptr != 0;
2261                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2262              {
2263              const char *substring;
2264              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2265                count, (char *)getnamesptr, &substring);
2266              if (rc < 0)
2267                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2268              else
2269                {
2270                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2271                pcre_free_substring(substring);
2272                }
2273              }
2274    
2275          if (getlist)          if (getlist)
2276            {            {
2277            const char **stringlist;            const char **stringlist;
2278            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2279              &stringlist);              &stringlist);
2280            if (rc < 0)            if (rc < 0)
2281              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 945  while (!done) Line 2285  while (!done)
2285                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2286              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2287                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
2288              free((void *)stringlist);              /* free((void *)stringlist); */
2289                pcre_free_substring_list(stringlist);
2290              }              }
2291            }            }
2292          }          }
2293    
2294          /* There was a partial match */
2295    
2296          else if (count == PCRE_ERROR_PARTIAL)
2297            {
2298            fprintf(outfile, "Partial match");
2299    #if !defined NODFA
2300            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2301              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2302                bptr + use_offsets[0]);
2303    #endif
2304            fprintf(outfile, "\n");
2305            break;  /* Out of the /g loop */
2306            }
2307    
2308        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2309        PCRE_NOTEMPTY after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2310        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
2311        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
2312        was checked before setting PCRE_NOTEMPTY. */  
2313          Complication arises in the case when the newline option is "any" or
2314          "anycrlf". If the previous match was at the end of a line terminated by
2315          CRLF, an advance of one character just passes the \r, whereas we should
2316          prefer the longer newline sequence, as does the code in pcre_exec().
2317          Fudge the offset value to achieve this.
2318    
2319          Otherwise, in the case of UTF-8 matching, the advance must be one
2320          character, not one byte. */
2321    
2322        else        else
2323          {          {
2324          if (g_notempty != 0)          if (g_notempty != 0)
2325            {            {
2326            offsets[0] = start_offset;            int onechar = 1;
2327            offsets[1] = start_offset + 1;            unsigned int obits = ((real_pcre *)re)->options;
2328              use_offsets[0] = start_offset;
2329              if ((obits & PCRE_NEWLINE_BITS) == 0)
2330                {
2331                int d;
2332                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2333                obits = (d == '\r')? PCRE_NEWLINE_CR :
2334                        (d == '\n')? PCRE_NEWLINE_LF :
2335                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2336                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2337                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2338                }
2339              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2340                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2341                  &&
2342                  start_offset < len - 1 &&
2343                  bptr[start_offset] == '\r' &&
2344                  bptr[start_offset+1] == '\n')
2345                onechar++;
2346              else if (use_utf8)
2347                {
2348                while (start_offset + onechar < len)
2349                  {
2350                  int tb = bptr[start_offset+onechar];
2351                  if (tb <= 127) break;
2352                  tb &= 0xc0;
2353                  if (tb != 0 && tb != 0xc0) onechar++;
2354                  }
2355                }
2356              use_offsets[1] = start_offset + onechar;
2357            }            }
2358          else          else
2359            {            {
2360            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2361              {              {
2362              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
2363              }              }
2364              else fprintf(outfile, "Error %d\n", count);
2365            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2366            }            }
2367          }          }
# Line 981  while (!done) Line 2373  while (!done)
2373        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
2374        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic
2375        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
2376        we set PCRE_NOTEMPTY and try the match again at the same point. If this        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2377        fails (picked up above) we advance to the next character. */        same point. If this fails (picked up above) we advance to the next
2378          character. */
2379    
2380        g_notempty = 0;        g_notempty = 0;
2381        if (offsets[0] == offsets[1])  
2382          if (use_offsets[0] == use_offsets[1])
2383          {          {
2384          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
2385          g_notempty = PCRE_NOTEMPTY;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2386          }          }
2387    
2388        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
2389    
2390        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
2391    
2392        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
2393    
2394        else        else
2395          {          {
2396          bptr += offsets[1];          bptr += use_offsets[1];
2397          len -= offsets[1];          len -= use_offsets[1];
2398          }          }
2399        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2400    
2401        NEXT_DATA: continue;
2402      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2403    
2404    CONTINUE:    CONTINUE:
# Line 1011  while (!done) Line 2407  while (!done)
2407    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2408  #endif  #endif
2409    
2410    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2411    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2412    if (tables != NULL)    if (tables != NULL)
2413      {      {
2414      free((void *)tables);      new_free((void *)tables);
2415      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2416        locale_set = 0;
2417      }      }
2418    }    }
2419    
2420  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2421  return 0;  
2422    EXIT:
2423    
2424    if (infile != NULL && infile != stdin) fclose(infile);
2425    if (outfile != NULL && outfile != stdout) fclose(outfile);
2426    
2427    free(buffer);
2428    free(dbuffer);
2429    free(pbuffer);
2430    free(offsets);
2431    
2432    return yield;
2433  }  }
2434    
2435  /* End */  /* End of pcretest.c */

Legend:
Removed from v.41  
changed lines
  Added in v.289

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12