/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 31 by nigel, Sat Feb 24 21:38:57 2007 UTC revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    
52    /* A number of things vary for Windows builds. Originally, pcretest opened its
53    input and output without "b"; then I was told that "b" was needed in some
54    environments, so it was added for release 5.0 to both the input and output. (It
55    makes no difference on Unix-like systems.) Later I was told that it is wrong
56    for the input on Windows. I've now abstracted the modes into two macros that
57    are set here, to make it easier to fiddle with them, and removed "b" from the
58    input mode under Windows. */
59    
60    #if defined(_WIN32) || defined(WIN32)
61    #include <io.h>                /* For _setmode() */
62    #include <fcntl.h>             /* For _O_BINARY */
63    #define INPUT_MODE   "r"
64    #define OUTPUT_MODE  "wb"
65    
66    #else
67    #include <sys/time.h>          /* These two includes are needed */
68    #include <sys/resource.h>      /* for setrlimit(). */
69    #define INPUT_MODE   "rb"
70    #define OUTPUT_MODE  "wb"
71    #endif
72    
73    
74    /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76    internal macros, structures, and other internal data values; pcretest has
77    "inside information" compared to a program that strictly follows the PCRE API.
78    
79    Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84    #include "pcre_internal.h"
85    
86  /* Use the internal info for displaying the results of pcre_study(). */  /* We need access to the data tables that PCRE uses. So as not to have to keep
87    two copies, we include the source file here, changing the names of the external
88    symbols to prevent clashes. */
89    
90  #include "internal.h"  #define _pcre_utf8_table1      utf8_table1
91    #define _pcre_utf8_table1_size utf8_table1_size
92    #define _pcre_utf8_table2      utf8_table2
93    #define _pcre_utf8_table3      utf8_table3
94    #define _pcre_utf8_table4      utf8_table4
95    #define _pcre_utt              utt
96    #define _pcre_utt_size         utt_size
97    #define _pcre_OP_lengths       OP_lengths
98    
99    #include "pcre_tables.c"
100    
101    /* We also need the pcre_printint() function for printing out compiled
102    patterns. This function is in a separate file so that it can be included in
103    pcre_compile.c when that module is compiled with debugging enabled.
104    
105    The definition of the macro PRINTABLE, which determines whether to print an
106    output character as-is or as a hex value when showing compiled patterns, is
107    contained in this file. We uses it here also, in cases when the locale has not
108    been explicitly changed, so as to get consistent output from systems that
109    differ in their output from isprint() even in the "C" locale. */
110    
111    #include "pcre_printint.src"
112    
113    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114    
115    
116    /* It is possible to compile this test program without including support for
117    testing the POSIX interface, though this is not available via the standard
118    Makefile. */
119    
120    #if !defined NOPOSIX
121  #include "pcreposix.h"  #include "pcreposix.h"
122    #endif
123    
124    /* It is also possible, for the benefit of the version currently imported into
125    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126    interface to the DFA matcher (NODFA), and without the doublecheck of the old
127    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137    /* Other parameters */
138    
139  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
140  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 144 
144  #endif  #endif
145  #endif  #endif
146    
147  #define LOOPREPEAT 20000  /* This is the default loop count for timing. */
148    
149    #define LOOPREPEAT 500000
150    
151    /* Static variables */
152    
153  static FILE *outfile;  static FILE *outfile;
154  static int log_store = 0;  static int log_store = 0;
155    static int callout_count;
156    static int callout_extra;
157    static int callout_fail_count;
158    static int callout_fail_id;
159    static int first_callout;
160    static int locale_set = 0;
161    static int show_malloc;
162    static int use_utf8;
163    static size_t gotten_store;
164    
165    /* The buffers grow automatically if very long input lines are encountered. */
166    
167    static int buffer_size = 50000;
168    static uschar *buffer = NULL;
169    static uschar *dbuffer = NULL;
170    static uschar *pbuffer = NULL;
171    
172    
173    
174  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
175  code as contained in pcre.c under the DEBUG macro. */  *        Read or extend an input line            *
176    *************************************************/
177    
178  static const char *OP_names[] = {  /* Input lines are read into buffer, but both patterns and data lines can be
179    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  continued over multiple input lines. In addition, if the buffer fills up, we
180    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  want to automatically expand it so as to be able to handle extremely large
181    "Opt", "^", "$", "Any", "chars", "not",  lines that are needed for certain stress tests. When the input buffer is
182    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  expanded, the other two buffers must also be expanded likewise, and the
183    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  contents of pbuffer, which are a copy of the input for callouts, must be
184    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  preserved (for when expansion happens for a data line). This is not the most
185    "*", "*?", "+", "+?", "?", "??", "{", "{",  optimal way of handling this, but hey, this is just a test program!
186    "class", "Ref",  
187    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  Arguments:
188    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    f            the file to read
189    "Brazero", "Braminzero", "Bra"    start        where in buffer to start (this *must* be within buffer)
190  };  
191    Returns:       pointer to the start of new data
192                   could be a copy of start, or could be moved
193  static void print_internals(pcre *re, FILE *outfile)                 NULL if no data read and EOF reached
194  {  */
 unsigned char *code = ((real_pcre *)re)->code;  
   
 fprintf(outfile, "------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
   
   if (*code >= OP_BRA)  
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
195    
196          case OP_CRRANGE:  static uschar *
197          case OP_CRMINRANGE:  extend_inputline(FILE *f, uschar *start)
198          min = (code[1] << 8) + code[2];  {
199          max = (code[3] << 8) + code[4];  uschar *here = start;
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
200    
201          default:  for (;;)
202          code--;    {
203          }    int rlen = buffer_size - (here - buffer);
204    
205      if (rlen > 1000)
206        {
207        int dlen;
208        if (fgets((char *)here, rlen,  f) == NULL)
209          return (here == start)? NULL : start;
210        dlen = (int)strlen((char *)here);
211        if (dlen > 0 && here[dlen - 1] == '\n') return start;
212        here += dlen;
213        }
214    
215      else
216        {
217        int new_buffer_size = 2*buffer_size;
218        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
219        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
220        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
221    
222        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
223          {
224          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
225          exit(1);
226        }        }
     break;  
227    
228      /* Anything else is just a one-node item */      memcpy(new_buffer, buffer, buffer_size);
229        memcpy(new_pbuffer, pbuffer, buffer_size);
230    
231        buffer_size = new_buffer_size;
232    
233        start = new_buffer + (start - buffer);
234        here = new_buffer + (here - buffer);
235    
236      default:      free(buffer);
237      fprintf(outfile, "    %s", OP_names[*code]);      free(dbuffer);
238      break;      free(pbuffer);
239    
240        buffer = new_buffer;
241        dbuffer = new_dbuffer;
242        pbuffer = new_pbuffer;
243      }      }
244      }
245    
246    return NULL;  /* Control never gets here */
247    }
248    
249    
250    
251    code++;  
252    fprintf(outfile, "\n");  
253    
254    
255    /*************************************************
256    *          Read number from string               *
257    *************************************************/
258    
259    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
260    around with conditional compilation, just do the job by hand. It is only used
261    for unpicking arguments, so just keep it simple.
262    
263    Arguments:
264      str           string to be converted
265      endptr        where to put the end pointer
266    
267    Returns:        the unsigned long
268    */
269    
270    static int
271    get_value(unsigned char *str, unsigned char **endptr)
272    {
273    int result = 0;
274    while(*str != 0 && isspace(*str)) str++;
275    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
276    *endptr = str;
277    return(result);
278    }
279    
280    
281    
282    
283    /*************************************************
284    *            Convert UTF-8 string to value       *
285    *************************************************/
286    
287    /* This function takes one or more bytes that represents a UTF-8 character,
288    and returns the value of the character.
289    
290    Argument:
291      utf8bytes   a pointer to the byte vector
292      vptr        a pointer to an int to receive the value
293    
294    Returns:      >  0 => the number of bytes consumed
295                  -6 to 0 => malformed UTF-8 character at offset = (-return)
296    */
297    
298    #if !defined NOUTF8
299    
300    static int
301    utf82ord(unsigned char *utf8bytes, int *vptr)
302    {
303    int c = *utf8bytes++;
304    int d = c;
305    int i, j, s;
306    
307    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
308      {
309      if ((d & 0x80) == 0) break;
310      d <<= 1;
311      }
312    
313    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
314    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
315    
316    /* i now has a value in the range 1-5 */
317    
318    s = 6*i;
319    d = (c & utf8_table3[i]) << s;
320    
321    for (j = 0; j < i; j++)
322      {
323      c = *utf8bytes++;
324      if ((c & 0xc0) != 0x80) return -(j+1);
325      s -= 6;
326      d |= (c & 0x3f) << s;
327    }    }
328    
329    /* Check that encoding was the correct unique one */
330    
331    for (j = 0; j < utf8_table1_size; j++)
332      if (d <= utf8_table1[j]) break;
333    if (j != i) return -(i+1);
334    
335    /* Valid value */
336    
337    *vptr = d;
338    return i+1;
339  }  }
340    
341    #endif
342    
343    
344    
345    /*************************************************
346    *       Convert character value to UTF-8         *
347    *************************************************/
348    
349    /* This function takes an integer value in the range 0 - 0x7fffffff
350    and encodes it as a UTF-8 character in 0 to 6 bytes.
351    
352    Arguments:
353      cvalue     the character value
354      utf8bytes  pointer to buffer for result - at least 6 bytes long
355    
356    Returns:     number of characters placed in the buffer
357    */
358    
359    #if !defined NOUTF8
360    
361    static int
362    ord2utf8(int cvalue, uschar *utf8bytes)
363    {
364    register int i, j;
365    for (i = 0; i < utf8_table1_size; i++)
366      if (cvalue <= utf8_table1[i]) break;
367    utf8bytes += i;
368    for (j = i; j > 0; j--)
369     {
370     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
371     cvalue >>= 6;
372     }
373    *utf8bytes = utf8_table2[i] | cvalue;
374    return i + 1;
375    }
376    
377    #endif
378    
379    
380    
381    /*************************************************
382    *             Print character string             *
383    *************************************************/
384    
385  /* Character string printing function. */  /* Character string printing function. Must handle UTF-8 strings in utf8
386    mode. Yields number of characters printed. If handed a NULL file, just counts
387    chars without printing. */
388    
389  static void pchars(unsigned char *p, int length)  static int pchars(unsigned char *p, int length, FILE *f)
390  {  {
391  int c;  int c = 0;
392    int yield = 0;
393    
394  while (length-- > 0)  while (length-- > 0)
395    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
396      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
397      if (use_utf8)
398        {
399        int rc = utf82ord(p, &c);
400    
401        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
402          {
403          length -= rc - 1;
404          p += rc;
405          if (PRINTHEX(c))
406            {
407            if (f != NULL) fprintf(f, "%c", c);
408            yield++;
409            }
410          else
411            {
412            int n = 4;
413            if (f != NULL) fprintf(f, "\\x{%02x}", c);
414            yield += (n <= 0x000000ff)? 2 :
415                     (n <= 0x00000fff)? 3 :
416                     (n <= 0x0000ffff)? 4 :
417                     (n <= 0x000fffff)? 5 : 6;
418            }
419          continue;
420          }
421        }
422    #endif
423    
424       /* Not UTF-8, or malformed UTF-8  */
425    
426      c = *p++;
427      if (PRINTHEX(c))
428        {
429        if (f != NULL) fprintf(f, "%c", c);
430        yield++;
431        }
432      else
433        {
434        if (f != NULL) fprintf(f, "\\x%02x", c);
435        yield += 4;
436        }
437      }
438    
439    return yield;
440    }
441    
442    
443    
444    /*************************************************
445    *              Callout function                  *
446    *************************************************/
447    
448    /* Called from PCRE as a result of the (?C) item. We print out where we are in
449    the match. Yield zero unless more callouts than the fail count, or the callout
450    data is not zero. */
451    
452    static int callout(pcre_callout_block *cb)
453    {
454    FILE *f = (first_callout | callout_extra)? outfile : NULL;
455    int i, pre_start, post_start, subject_length;
456    
457    if (callout_extra)
458      {
459      fprintf(f, "Callout %d: last capture = %d\n",
460        cb->callout_number, cb->capture_last);
461    
462      for (i = 0; i < cb->capture_top * 2; i += 2)
463        {
464        if (cb->offset_vector[i] < 0)
465          fprintf(f, "%2d: <unset>\n", i/2);
466        else
467          {
468          fprintf(f, "%2d: ", i/2);
469          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
470            cb->offset_vector[i+1] - cb->offset_vector[i], f);
471          fprintf(f, "\n");
472          }
473        }
474      }
475    
476    /* Re-print the subject in canonical form, the first time or if giving full
477    datails. On subsequent calls in the same match, we use pchars just to find the
478    printed lengths of the substrings. */
479    
480    if (f != NULL) fprintf(f, "--->");
481    
482    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
483    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
484      cb->current_position - cb->start_match, f);
485    
486    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
487    
488    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
489      cb->subject_length - cb->current_position, f);
490    
491    if (f != NULL) fprintf(f, "\n");
492    
493    /* Always print appropriate indicators, with callout number if not already
494    shown. For automatic callouts, show the pattern offset. */
495    
496    if (cb->callout_number == 255)
497      {
498      fprintf(outfile, "%+3d ", cb->pattern_position);
499      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
500      }
501    else
502      {
503      if (callout_extra) fprintf(outfile, "    ");
504        else fprintf(outfile, "%3d ", cb->callout_number);
505      }
506    
507    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
508    fprintf(outfile, "^");
509    
510    if (post_start > 0)
511      {
512      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
513      fprintf(outfile, "^");
514      }
515    
516    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
517      fprintf(outfile, " ");
518    
519    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
520      pbuffer + cb->pattern_position);
521    
522    fprintf(outfile, "\n");
523    first_callout = 0;
524    
525    if (cb->callout_data != NULL)
526      {
527      int callout_data = *((int *)(cb->callout_data));
528      if (callout_data != 0)
529        {
530        fprintf(outfile, "Callout data = %d\n", callout_data);
531        return callout_data;
532        }
533      }
534    
535    return (cb->callout_number != callout_fail_id)? 0 :
536           (++callout_count >= callout_fail_count)? 1 : 0;
537  }  }
538    
539    
540    /*************************************************
541    *            Local malloc functions              *
542    *************************************************/
543    
544  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
545  compiled re. */  compiled re. */
546    
547  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
548  {  {
549  if (log_store)  void *block = malloc(size);
550    fprintf(outfile, "Memory allocation request: %d (code space %d)\n",  gotten_store = size;
551      (int)size, (int)size - offsetof(real_pcre, code[0]));  if (show_malloc)
552  return malloc(size);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
553    return block;
554    }
555    
556    static void new_free(void *block)
557    {
558    if (show_malloc)
559      fprintf(outfile, "free             %p\n", block);
560    free(block);
561    }
562    
563    
564    /* For recursion malloc/free, to test stacking calls */
565    
566    static void *stack_malloc(size_t size)
567    {
568    void *block = malloc(size);
569    if (show_malloc)
570      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
571    return block;
572    }
573    
574    static void stack_free(void *block)
575    {
576    if (show_malloc)
577      fprintf(outfile, "stack_free       %p\n", block);
578    free(block);
579  }  }
580    
581    
582    /*************************************************
583    *          Call pcre_fullinfo()                  *
584    *************************************************/
585    
586    /* Get one piece of information from the pcre_fullinfo() function */
587    
588    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
589    {
590    int rc;
591    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
592      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
593    }
594    
595    
596    
597    /*************************************************
598    *         Byte flipping function                 *
599    *************************************************/
600    
601    static unsigned long int
602    byteflip(unsigned long int value, int n)
603    {
604    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
605    return ((value & 0x000000ff) << 24) |
606           ((value & 0x0000ff00) <<  8) |
607           ((value & 0x00ff0000) >>  8) |
608           ((value & 0xff000000) >> 24);
609    }
610    
611    
612    
613    
614    /*************************************************
615    *        Check match or recursion limit          *
616    *************************************************/
617    
618    static int
619    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
620      int start_offset, int options, int *use_offsets, int use_size_offsets,
621      int flag, unsigned long int *limit, int errnumber, const char *msg)
622    {
623    int count;
624    int min = 0;
625    int mid = 64;
626    int max = -1;
627    
628    extra->flags |= flag;
629    
630    for (;;)
631      {
632      *limit = mid;
633    
634      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
635        use_offsets, use_size_offsets);
636    
637      if (count == errnumber)
638        {
639        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
640        min = mid;
641        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
642        }
643    
644      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
645                             count == PCRE_ERROR_PARTIAL)
646        {
647        if (mid == min + 1)
648          {
649          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
650          break;
651          }
652        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
653        max = mid;
654        mid = (min + mid)/2;
655        }
656      else break;    /* Some other error */
657      }
658    
659    extra->flags &= ~flag;
660    return count;
661    }
662    
663    
664    
665    /*************************************************
666    *         Check newline indicator                *
667    *************************************************/
668    
669    /* This is used both at compile and run-time to check for <xxx> escapes, where
670    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
671    no match.
672    
673    Arguments:
674      p           points after the leading '<'
675      f           file for error message
676    
677    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
678    */
679    
680    static int
681    check_newline(uschar *p, FILE *f)
682    {
683    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
684    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
685    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
686    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
687    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
688    fprintf(f, "Unknown newline type at: <%s\n", p);
689    return 0;
690    }
691    
692    
693    
694    /*************************************************
695    *             Usage function                     *
696    *************************************************/
697    
698    static void
699    usage(void)
700    {
701    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
702    printf("  -b       show compiled code (bytecode)\n");
703    printf("  -C       show PCRE compile-time options and exit\n");
704    printf("  -d       debug: show compiled code and information (-b and -i)\n");
705    #if !defined NODFA
706    printf("  -dfa     force DFA matching for all subjects\n");
707    #endif
708    printf("  -help    show usage information\n");
709    printf("  -i       show information about compiled patterns\n"
710           "  -m       output memory used information\n"
711           "  -o <n>   set size of offsets vector to <n>\n");
712    #if !defined NOPOSIX
713    printf("  -p       use POSIX interface\n");
714    #endif
715    printf("  -q       quiet: do not output PCRE version number at start\n");
716    printf("  -S <n>   set stack size to <n> megabytes\n");
717    printf("  -s       output store (memory) used information\n"
718           "  -t       time compilation and execution\n");
719    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
720    printf("  -tm      time execution (matching) only\n");
721    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
722    }
723    
724    
725    
726    /*************************************************
727    *                Main Program                    *
728    *************************************************/
729    
730  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
731  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 293  int options = 0; Line 738  int options = 0;
738  int study_options = 0;  int study_options = 0;
739  int op = 1;  int op = 1;
740  int timeit = 0;  int timeit = 0;
741    int timeitm = 0;
742  int showinfo = 0;  int showinfo = 0;
743  int showstore = 0;  int showstore = 0;
744    int quiet = 0;
745    int size_offsets = 45;
746    int size_offsets_max;
747    int *offsets = NULL;
748    #if !defined NOPOSIX
749  int posix = 0;  int posix = 0;
750    #endif
751  int debug = 0;  int debug = 0;
752  int done = 0;  int done = 0;
753  unsigned char buffer[30000];  int all_use_dfa = 0;
754  unsigned char dbuffer[1024];  int yield = 0;
755    int stack_size;
756    
757    /* These vectors store, end-to-end, a list of captured substring names. Assume
758    that 1024 is plenty long enough for the few names we'll be testing. */
759    
760    uschar copynames[1024];
761    uschar getnames[1024];
762    
763  /* Static so that new_malloc can use it. */  uschar *copynamesptr;
764    uschar *getnamesptr;
765    
766    /* Get buffers from malloc() so that Electric Fence will check their misuse
767    when I am debugging. They grow automatically when very long lines are read. */
768    
769    buffer = (unsigned char *)malloc(buffer_size);
770    dbuffer = (unsigned char *)malloc(buffer_size);
771    pbuffer = (unsigned char *)malloc(buffer_size);
772    
773    /* The outfile variable is static so that new_malloc can use it. */
774    
775  outfile = stdout;  outfile = stdout;
776    
777    /* The following  _setmode() stuff is some Windows magic that tells its runtime
778    library to translate CRLF into a single LF character. At least, that's what
779    I've been told: never having used Windows I take this all on trust. Originally
780    it set 0x8000, but then I was advised that _O_BINARY was better. */
781    
782    #if defined(_WIN32) || defined(WIN32)
783    _setmode( _fileno( stdout ), _O_BINARY );
784    #endif
785    
786  /* Scan options */  /* Scan options */
787    
788  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
789    {    {
790      unsigned char *endptr;
791    
792    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
793      showstore = 1;      showstore = 1;
794    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
795      else if (strcmp(argv[op], "-b") == 0) debug = 1;
796    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
797    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
798    #if !defined NODFA
799      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
800    #endif
801      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
802          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
803            *endptr == 0))
804        {
805        op++;
806        argc--;
807        }
808      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
809        {
810        int both = argv[op][2] == 0;
811        int temp;
812        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
813                         *endptr == 0))
814          {
815          timeitm = temp;
816          op++;
817          argc--;
818          }
819        else timeitm = LOOPREPEAT;
820        if (both) timeit = timeitm;
821        }
822      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
823          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
824            *endptr == 0))
825        {
826    #if defined(_WIN32) || defined(WIN32)
827        printf("PCRE: -S not supported on this OS\n");
828        exit(1);
829    #else
830        int rc;
831        struct rlimit rlim;
832        getrlimit(RLIMIT_STACK, &rlim);
833        rlim.rlim_cur = stack_size * 1024 * 1024;
834        rc = setrlimit(RLIMIT_STACK, &rlim);
835        if (rc != 0)
836          {
837        printf("PCRE: setrlimit() failed with error %d\n", rc);
838        exit(1);
839          }
840        op++;
841        argc--;
842    #endif
843        }
844    #if !defined NOPOSIX
845    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
846    #endif
847      else if (strcmp(argv[op], "-C") == 0)
848        {
849        int rc;
850        printf("PCRE version %s\n", pcre_version());
851        printf("Compiled with\n");
852        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
853        printf("  %sUTF-8 support\n", rc? "" : "No ");
854        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
855        printf("  %sUnicode properties support\n", rc? "" : "No ");
856        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
857        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
858          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
859          (rc == -2)? "ANYCRLF" :
860          (rc == -1)? "ANY" : "???");
861        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
862        printf("  Internal link size = %d\n", rc);
863        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
864        printf("  POSIX malloc threshold = %d\n", rc);
865        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
866        printf("  Default match limit = %d\n", rc);
867        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
868        printf("  Default recursion depth limit = %d\n", rc);
869        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
870        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
871        goto EXIT;
872        }
873      else if (strcmp(argv[op], "-help") == 0 ||
874               strcmp(argv[op], "--help") == 0)
875        {
876        usage();
877        goto EXIT;
878        }
879    else    else
880      {      {
881      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
882      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
883      printf("  -d   debug: show compiled code; implies -i\n"      yield = 1;
884             "  -i   show information about compiled pattern\n"      goto EXIT;
            "  -p   use POSIX interface\n"  
            "  -s   output store information\n"  
            "  -t   time compilation and execution\n");  
     return 1;  
885      }      }
886    op++;    op++;
887    argc--;    argc--;
888    }    }
889    
890    /* Get the store for the offsets vector, and remember what it was */
891    
892    size_offsets_max = size_offsets;
893    offsets = (int *)malloc(size_offsets_max * sizeof(int));
894    if (offsets == NULL)
895      {
896      printf("** Failed to get %d bytes of memory for offsets vector\n",
897        (int)(size_offsets_max * sizeof(int)));
898      yield = 1;
899      goto EXIT;
900      }
901    
902  /* Sort out the input and output files */  /* Sort out the input and output files */
903    
904  if (argc > 1)  if (argc > 1)
905    {    {
906    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
907    if (infile == NULL)    if (infile == NULL)
908      {      {
909      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
910      return 1;      yield = 1;
911        goto EXIT;
912      }      }
913    }    }
914    
915  if (argc > 2)  if (argc > 2)
916    {    {
917    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
918    if (outfile == NULL)    if (outfile == NULL)
919      {      {
920      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
921      return 1;      yield = 1;
922        goto EXIT;
923      }      }
924    }    }
925    
926  /* Set alternative malloc function */  /* Set alternative malloc function */
927    
928  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
929    pcre_free = new_free;
930    pcre_stack_malloc = stack_malloc;
931    pcre_stack_free = stack_free;
932    
933  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
934    
935  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
936    
937  /* Main loop */  /* Main loop */
938    
# Line 366  while (!done) Line 940  while (!done)
940    {    {
941    pcre *re = NULL;    pcre *re = NULL;
942    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
943    
944    #if !defined NOPOSIX  /* There are still compilers that require no indent */
945    regex_t preg;    regex_t preg;
946      int do_posix = 0;
947    #endif
948    
949    const char *error;    const char *error;
950    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
951    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
952      const unsigned char *tables = NULL;
953      unsigned long int true_size, true_study_size = 0;
954      size_t size, regex_gotten_store;
955    int do_study = 0;    int do_study = 0;
956    int do_debug = debug;    int do_debug = debug;
957      int debug_lengths = 1;
958      int do_G = 0;
959      int do_g = 0;
960    int do_showinfo = showinfo;    int do_showinfo = showinfo;
961    int do_posix = 0;    int do_showrest = 0;
962    int erroroffset, len, delimiter;    int do_flip = 0;
963      int erroroffset, len, delimiter, poffset;
964    
965      use_utf8 = 0;
966    
967    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
968    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
969    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
970      fflush(outfile);
971    
972    p = buffer;    p = buffer;
973    while (isspace(*p)) p++;    while (isspace(*p)) p++;
974    if (*p == 0) continue;    if (*p == 0) continue;
975    
976    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
977    complete, read more. */  
978      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
979        {
980        unsigned long int magic, get_options;
981        uschar sbuf[8];
982        FILE *f;
983    
984        p++;
985        pp = p + (int)strlen((char *)p);
986        while (isspace(pp[-1])) pp--;
987        *pp = 0;
988    
989        f = fopen((char *)p, "rb");
990        if (f == NULL)
991          {
992          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
993          continue;
994          }
995    
996        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
997    
998        true_size =
999          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1000        true_study_size =
1001          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1002    
1003        re = (real_pcre *)new_malloc(true_size);
1004        regex_gotten_store = gotten_store;
1005    
1006        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1007    
1008        magic = ((real_pcre *)re)->magic_number;
1009        if (magic != MAGIC_NUMBER)
1010          {
1011          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1012            {
1013            do_flip = 1;
1014            }
1015          else
1016            {
1017            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1018            fclose(f);
1019            continue;
1020            }
1021          }
1022    
1023        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1024          do_flip? " (byte-inverted)" : "", p);
1025    
1026        /* Need to know if UTF-8 for printing data strings */
1027    
1028        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1029        use_utf8 = (get_options & PCRE_UTF8) != 0;
1030    
1031        /* Now see if there is any following study data */
1032    
1033        if (true_study_size != 0)
1034          {
1035          pcre_study_data *psd;
1036    
1037          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1038          extra->flags = PCRE_EXTRA_STUDY_DATA;
1039    
1040          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1041          extra->study_data = psd;
1042    
1043          if (fread(psd, 1, true_study_size, f) != true_study_size)
1044            {
1045            FAIL_READ:
1046            fprintf(outfile, "Failed to read data from %s\n", p);
1047            if (extra != NULL) new_free(extra);
1048            if (re != NULL) new_free(re);
1049            fclose(f);
1050            continue;
1051            }
1052          fprintf(outfile, "Study data loaded from %s\n", p);
1053          do_study = 1;     /* To get the data output if requested */
1054          }
1055        else fprintf(outfile, "No study data\n");
1056    
1057        fclose(f);
1058        goto SHOW_INFO;
1059        }
1060    
1061      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1062      the pattern; if is isn't complete, read more. */
1063    
1064    delimiter = *p++;    delimiter = *p++;
1065    
# Line 396  while (!done) Line 1070  while (!done)
1070      }      }
1071    
1072    pp = p;    pp = p;
1073      poffset = p - buffer;
1074    
1075    for(;;)    for(;;)
1076      {      {
# Line 406  while (!done) Line 1081  while (!done)
1081        pp++;        pp++;
1082        }        }
1083      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1084      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1085      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1086        {        {
1087        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1088        done = 1;        done = 1;
# Line 424  while (!done) Line 1091  while (!done)
1091      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1092      }      }
1093    
1094      /* The buffer may have moved while being extended; reset the start of data
1095      pointer to the correct relative point in the buffer. */
1096    
1097      p = buffer + poffset;
1098    
1099    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1100    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1101    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1102    
1103    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1104    
1105    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1106      for callouts. */
1107    
1108    *pp++ = 0;    *pp++ = 0;
1109      strcpy((char *)pbuffer, (char *)p);
1110    
1111    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1112    
# Line 444  while (!done) Line 1118  while (!done)
1118      {      {
1119      switch (*pp++)      switch (*pp++)
1120        {        {
1121          case 'f': options |= PCRE_FIRSTLINE; break;
1122          case 'g': do_g = 1; break;
1123        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1124        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1125        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1126        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1127    
1128          case '+': do_showrest = 1; break;
1129        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1130          case 'B': do_debug = 1; break;
1131          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1132        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1133        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1134          case 'F': do_flip = 1; break;
1135          case 'G': do_G = 1; break;
1136        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1137          case 'J': options |= PCRE_DUPNAMES; break;
1138        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1139          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1140    
1141    #if !defined NOPOSIX
1142        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1143    #endif
1144    
1145        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1146        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1147        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1148          case 'Z': debug_lengths = 0; break;
1149          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1150          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1151    
1152        case 'L':        case 'L':
1153        ppp = pp;        ppp = pp;
1154        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1155          /* The '0' test is just in case this is an unterminated line. */
1156          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1157        *ppp = 0;        *ppp = 0;
1158        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1159          {          {
1160          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1161          goto SKIP_DATA;          goto SKIP_DATA;
1162          }          }
1163          locale_set = 1;
1164        tables = pcre_maketables();        tables = pcre_maketables();
1165        pp = ppp;        pp = ppp;
1166        break;        break;
1167    
1168        case '\n': case ' ': break;        case '>':
1169          to_file = pp;
1170          while (*pp != 0) pp++;
1171          while (isspace(pp[-1])) pp--;
1172          *pp = 0;
1173          break;
1174    
1175          case '<':
1176            {
1177            int x = check_newline(pp, outfile);
1178            if (x == 0) goto SKIP_DATA;
1179            options |= x;
1180            while (*pp++ != '>');
1181            }
1182          break;
1183    
1184          case '\r':                      /* So that it works in Windows */
1185          case '\n':
1186          case ' ':
1187          break;
1188    
1189        default:        default:
1190        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1191        goto SKIP_DATA;        goto SKIP_DATA;
# Line 483  while (!done) Line 1196  while (!done)
1196    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
1197    local character tables. */    local character tables. */
1198    
1199    #if !defined NOPOSIX
1200    if (posix || do_posix)    if (posix || do_posix)
1201      {      {
1202      int rc;      int rc;
1203      int cflags = 0;      int cflags = 0;
1204    
1205      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1206      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1207        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1208        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1209        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1210    
1211      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1212    
1213      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 496  while (!done) Line 1215  while (!done)
1215    
1216      if (rc != 0)      if (rc != 0)
1217        {        {
1218        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1219        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1220        goto SKIP_DATA;        goto SKIP_DATA;
1221        }        }
# Line 505  while (!done) Line 1224  while (!done)
1224    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1225    
1226    else    else
1227    #endif  /* !defined NOPOSIX */
1228    
1229      {      {
1230      if (timeit)      if (timeit > 0)
1231        {        {
1232        register int i;        register int i;
1233        clock_t time_taken;        clock_t time_taken;
1234        clock_t start_time = clock();        clock_t start_time = clock();
1235        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1236          {          {
1237          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1238          if (re != NULL) free(re);          if (re != NULL) free(re);
1239          }          }
1240        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1241        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1242          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)timeit) /
1243          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
1244        }        }
1245    
1246      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 535  while (!done) Line 1256  while (!done)
1256          {          {
1257          for (;;)          for (;;)
1258            {            {
1259            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1260              {              {
1261              done = 1;              done = 1;
1262              goto CONTINUE;              goto CONTINUE;
# Line 546  while (!done) Line 1267  while (!done)
1267            }            }
1268          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1269          }          }
1270        goto CONTINUE;        goto CONTINUE;
1271        }        }
1272    
1273        /* Compilation succeeded; print data if required. There are now two
1274        info-returning functions. The old one has a limited interface and
1275        returns only limited data. Check that it agrees with the newer one. */
1276    
1277        if (log_store)
1278          fprintf(outfile, "Memory allocation (code space): %d\n",
1279            (int)(gotten_store -
1280                  sizeof(real_pcre) -
1281                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1282    
1283        /* Extract the size for possible writing before possibly flipping it,
1284        and remember the store that was got. */
1285    
1286        true_size = ((real_pcre *)re)->size;
1287        regex_gotten_store = gotten_store;
1288    
1289        /* If /S was present, study the regexp to generate additional info to
1290        help with the matching. */
1291    
1292        if (do_study)
1293          {
1294          if (timeit > 0)
1295            {
1296            register int i;
1297            clock_t time_taken;
1298            clock_t start_time = clock();
1299            for (i = 0; i < timeit; i++)
1300              extra = pcre_study(re, study_options, &error);
1301            time_taken = clock() - start_time;
1302            if (extra != NULL) free(extra);
1303            fprintf(outfile, "  Study time %.4f milliseconds\n",
1304              (((double)time_taken * 1000.0) / (double)timeit) /
1305                (double)CLOCKS_PER_SEC);
1306            }
1307          extra = pcre_study(re, study_options, &error);
1308          if (error != NULL)
1309            fprintf(outfile, "Failed to study: %s\n", error);
1310          else if (extra != NULL)
1311            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1312          }
1313    
1314        /* If the 'F' option was present, we flip the bytes of all the integer
1315        fields in the regex data block and the study block. This is to make it
1316        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1317        compiled on a different architecture. */
1318    
1319        if (do_flip)
1320          {
1321          real_pcre *rre = (real_pcre *)re;
1322          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1323          rre->size = byteflip(rre->size, sizeof(rre->size));
1324          rre->options = byteflip(rre->options, sizeof(rre->options));
1325          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1326          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1327          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1328          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1329          rre->name_table_offset = byteflip(rre->name_table_offset,
1330            sizeof(rre->name_table_offset));
1331          rre->name_entry_size = byteflip(rre->name_entry_size,
1332            sizeof(rre->name_entry_size));
1333          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1334    
1335          if (extra != NULL)
1336            {
1337            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1338            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1339            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1340            }
1341          }
1342    
1343        /* Extract information from the compiled data if required */
1344    
1345        SHOW_INFO:
1346    
1347        if (do_debug)
1348          {
1349          fprintf(outfile, "------------------------------------------------------------------\n");
1350          pcre_printint(re, outfile, debug_lengths);
1351          }
1352    
1353        if (do_showinfo)
1354          {
1355          unsigned long int get_options, all_options;
1356    #if !defined NOINFOCHECK
1357          int old_first_char, old_options, old_count;
1358    #endif
1359          int count, backrefmax, first_char, need_char, okpartial, jchanged;
1360          int nameentrysize, namecount;
1361          const uschar *nametable;
1362    
1363          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1364          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1365          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1366          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1367          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1368          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1369          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1370          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1371          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1372          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1373          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1374    
1375    #if !defined NOINFOCHECK
1376          old_count = pcre_info(re, &old_options, &old_first_char);
1377          if (count < 0) fprintf(outfile,
1378            "Error %d from pcre_info()\n", count);
1379          else
1380            {
1381            if (old_count != count) fprintf(outfile,
1382              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1383                old_count);
1384    
1385            if (old_first_char != first_char) fprintf(outfile,
1386              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1387                first_char, old_first_char);
1388    
1389            if (old_options != (int)get_options) fprintf(outfile,
1390              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1391                get_options, old_options);
1392            }
1393    #endif
1394    
1395          if (size != regex_gotten_store) fprintf(outfile,
1396            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1397            (int)size, (int)regex_gotten_store);
1398    
1399          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1400          if (backrefmax > 0)
1401            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1402    
1403          if (namecount > 0)
1404            {
1405            fprintf(outfile, "Named capturing subpatterns:\n");
1406            while (namecount-- > 0)
1407              {
1408              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1409                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1410                GET2(nametable, 0));
1411              nametable += nameentrysize;
1412              }
1413            }
1414    
1415          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1416    
1417          all_options = ((real_pcre *)re)->options;
1418          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1419    
1420          if (get_options == 0) fprintf(outfile, "No options\n");
1421            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1422              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1423              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1424              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1425              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1426              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1427              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1428              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1429              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1430              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1431              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1432              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1433              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1434              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1435    
1436          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1437    
1438          switch (get_options & PCRE_NEWLINE_BITS)
1439            {
1440            case PCRE_NEWLINE_CR:
1441            fprintf(outfile, "Forced newline sequence: CR\n");
1442            break;
1443    
1444            case PCRE_NEWLINE_LF:
1445            fprintf(outfile, "Forced newline sequence: LF\n");
1446            break;
1447    
1448            case PCRE_NEWLINE_CRLF:
1449            fprintf(outfile, "Forced newline sequence: CRLF\n");
1450            break;
1451    
1452            case PCRE_NEWLINE_ANYCRLF:
1453            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1454            break;
1455    
1456            case PCRE_NEWLINE_ANY:
1457            fprintf(outfile, "Forced newline sequence: ANY\n");
1458            break;
1459    
1460            default:
1461            break;
1462            }
1463    
1464      /* Compilation succeeded; print data if required */        if (first_char == -1)
1465            {
1466            fprintf(outfile, "First char at start or follows newline\n");
1467            }
1468          else if (first_char < 0)
1469            {
1470            fprintf(outfile, "No first char\n");
1471            }
1472          else
1473            {
1474            int ch = first_char & 255;
1475            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1476              "" : " (caseless)";
1477            if (PRINTHEX(ch))
1478              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1479            else
1480              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1481            }
1482    
1483      if (do_showinfo)        if (need_char < 0)
1484        {          {
1485        int first_char, count;          fprintf(outfile, "No need char\n");
1486            }
1487          else
1488            {
1489            int ch = need_char & 255;
1490            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1491              "" : " (caseless)";
1492            if (PRINTHEX(ch))
1493              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1494            else
1495              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1496            }
1497    
1498        if (do_debug) print_internals(re, outfile);        /* Don't output study size; at present it is in any case a fixed
1499          value, but it varies, depending on the computer architecture, and
1500          so messes up the test suite. (And with the /F option, it might be
1501          flipped.) */
1502    
1503        count = pcre_info(re, &options, &first_char);        if (do_study)
       if (count < 0) fprintf(outfile,  
         "Error %d while reading info\n", count);  
       else  
1504          {          {
1505          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (extra == NULL)
1506          if (options == 0) fprintf(outfile, "No options\n");            fprintf(outfile, "Study returned NULL\n");
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "",  
             ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
         if (first_char == -1)  
           {  
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
1507          else          else
1508            {            {
1509            if (isprint(first_char))            uschar *start_bits = NULL;
1510              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1511    
1512              if (start_bits == NULL)
1513                fprintf(outfile, "No starting byte set\n");
1514            else            else
1515              fprintf(outfile, "First char = %d\n", first_char);              {
1516                int i;
1517                int c = 24;
1518                fprintf(outfile, "Starting byte set: ");
1519                for (i = 0; i < 256; i++)
1520                  {
1521                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1522                    {
1523                    if (c > 75)
1524                      {
1525                      fprintf(outfile, "\n  ");
1526                      c = 2;
1527                      }
1528                    if (PRINTHEX(i) && i != ' ')
1529                      {
1530                      fprintf(outfile, "%c ", i);
1531                      c += 2;
1532                      }
1533                    else
1534                      {
1535                      fprintf(outfile, "\\x%02x ", i);
1536                      c += 5;
1537                      }
1538                    }
1539                  }
1540                fprintf(outfile, "\n");
1541                }
1542            }            }
1543          }          }
1544        }        }
1545    
1546      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1547      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1548        the study length, in big-endian order. */
1549    
1550      if (do_study)      if (to_file != NULL)
1551        {        {
1552        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1553          if (f == NULL)
1554          {          {
1555          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           ((double)time_taken * 1000.0)/  
           ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));  
1556          }          }
1557          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (do_showinfo)  
1558          {          {
1559          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1560          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (true_size >> 24)  & 255;
1561            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1562            sbuf[2] = (true_size >>  8)  & 255;
1563            sbuf[3] = (true_size)  & 255;
1564    
1565            sbuf[4] = (true_study_size >> 24)  & 255;
1566            sbuf[5] = (true_study_size >> 16)  & 255;
1567            sbuf[6] = (true_study_size >>  8)  & 255;
1568            sbuf[7] = (true_study_size)  & 255;
1569    
1570            if (fwrite(sbuf, 1, 8, f) < 8 ||
1571                fwrite(re, 1, true_size, f) < true_size)
1572              {
1573              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1574              }
1575          else          else
1576            {            {
1577            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1578            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1579              {              {
1580              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1581                    true_study_size)
1582                {                {
1583                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1584                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1585                }                }
1586                else fprintf(outfile, "Study data written to %s\n", to_file);
1587    
1588              }              }
           fprintf(outfile, "\n");  
1589            }            }
1590            fclose(f);
1591          }          }
1592    
1593          new_free(re);
1594          if (extra != NULL) new_free(extra);
1595          if (tables != NULL) new_free((void *)tables);
1596          continue;  /* With next regex */
1597        }        }
1598      }      }        /* End of non-POSIX compile */
1599    
1600    /* Read data lines and test them */    /* Read data lines and test them */
1601    
1602    for (;;)    for (;;)
1603      {      {
1604      unsigned char *q;      uschar *q;
1605        uschar *bptr;
1606        int *use_offsets = offsets;
1607        int use_size_offsets = size_offsets;
1608        int callout_data = 0;
1609        int callout_data_set = 0;
1610      int count, c;      int count, c;
1611      int copystrings = 0;      int copystrings = 0;
1612        int find_match_limit = 0;
1613      int getstrings = 0;      int getstrings = 0;
1614      int getlist = 0;      int getlist = 0;
1615      int offsets[45];      int gmatched = 0;
1616      int size_offsets = sizeof(offsets)/sizeof(int);      int start_offset = 0;
1617        int g_notempty = 0;
1618        int use_dfa = 0;
1619    
1620      options = 0;      options = 0;
1621    
1622      if (infile == stdin) printf("  data> ");      *copynames = 0;
1623      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      *getnames = 0;
1624    
1625        copynamesptr = copynames;
1626        getnamesptr = getnames;
1627    
1628        pcre_callout = callout;
1629        first_callout = 1;
1630        callout_extra = 0;
1631        callout_count = 0;
1632        callout_fail_count = 999999;
1633        callout_fail_id = -1;
1634        show_malloc = 0;
1635    
1636        if (extra != NULL) extra->flags &=
1637          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1638    
1639        len = 0;
1640        for (;;)
1641        {        {
1642        done = 1;        if (infile == stdin) printf("data> ");
1643        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1644            {
1645            if (len > 0) break;
1646            done = 1;
1647            goto CONTINUE;
1648            }
1649          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1650          len = (int)strlen((char *)buffer);
1651          if (buffer[len-1] == '\n') break;
1652        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1653    
     len = (int)strlen((char *)buffer);  
1654      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1655      buffer[len] = 0;      buffer[len] = 0;
1656      if (len == 0) break;      if (len == 0) break;
# Line 686  while (!done) Line 1658  while (!done)
1658      p = buffer;      p = buffer;
1659      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1660    
1661      q = dbuffer;      bptr = q = dbuffer;
1662      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1663        {        {
1664        int i = 0;        int i = 0;
1665        int n = 0;        int n = 0;
1666    
1667        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1668          {          {
1669          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 707  while (!done) Line 1680  while (!done)
1680          c -= '0';          c -= '0';
1681          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1682            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1683    
1684    #if !defined NOUTF8
1685            if (use_utf8 && c > 255)
1686              {
1687              unsigned char buff8[8];
1688              int ii, utn;
1689              utn = ord2utf8(c, buff8);
1690              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1691              c = buff8[ii];   /* Last byte */
1692              }
1693    #endif
1694          break;          break;
1695    
1696          case 'x':          case 'x':
1697    
1698            /* Handle \x{..} specially - new Perl thing for utf8 */
1699    
1700    #if !defined NOUTF8
1701            if (*p == '{')
1702              {
1703              unsigned char *pt = p;
1704              c = 0;
1705              while (isxdigit(*(++pt)))
1706                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1707              if (*pt == '}')
1708                {
1709                unsigned char buff8[8];
1710                int ii, utn;
1711                utn = ord2utf8(c, buff8);
1712                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1713                c = buff8[ii];   /* Last byte */
1714                p = pt + 1;
1715                break;
1716                }
1717              /* Not correct form; fall through */
1718              }
1719    #endif
1720    
1721            /* Ordinary \x */
1722    
1723          c = 0;          c = 0;
1724          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1725            {            {
# Line 718  while (!done) Line 1728  while (!done)
1728            }            }
1729          break;          break;
1730    
1731          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1732          p--;          p--;
1733          continue;          continue;
1734    
1735            case '>':
1736            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1737            continue;
1738    
1739          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1740          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1741          continue;          continue;
# Line 731  while (!done) Line 1745  while (!done)
1745          continue;          continue;
1746    
1747          case 'C':          case 'C':
1748          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1749          copystrings |= 1 << n;            {
1750              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1751              copystrings |= 1 << n;
1752              }
1753            else if (isalnum(*p))
1754              {
1755              uschar *npp = copynamesptr;
1756              while (isalnum(*p)) *npp++ = *p++;
1757              *npp++ = 0;
1758              *npp = 0;
1759              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1760              if (n < 0)
1761                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1762              copynamesptr = npp;
1763              }
1764            else if (*p == '+')
1765              {
1766              callout_extra = 1;
1767              p++;
1768              }
1769            else if (*p == '-')
1770              {
1771              pcre_callout = NULL;
1772              p++;
1773              }
1774            else if (*p == '!')
1775              {
1776              callout_fail_id = 0;
1777              p++;
1778              while(isdigit(*p))
1779                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1780              callout_fail_count = 0;
1781              if (*p == '!')
1782                {
1783                p++;
1784                while(isdigit(*p))
1785                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1786                }
1787              }
1788            else if (*p == '*')
1789              {
1790              int sign = 1;
1791              callout_data = 0;
1792              if (*(++p) == '-') { sign = -1; p++; }
1793              while(isdigit(*p))
1794                callout_data = callout_data * 10 + *p++ - '0';
1795              callout_data *= sign;
1796              callout_data_set = 1;
1797              }
1798            continue;
1799    
1800    #if !defined NODFA
1801            case 'D':
1802    #if !defined NOPOSIX
1803            if (posix || do_posix)
1804              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1805            else
1806    #endif
1807              use_dfa = 1;
1808            continue;
1809    
1810            case 'F':
1811            options |= PCRE_DFA_SHORTEST;
1812          continue;          continue;
1813    #endif
1814    
1815          case 'G':          case 'G':
1816          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1817          getstrings |= 1 << n;            {
1818              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1819              getstrings |= 1 << n;
1820              }
1821            else if (isalnum(*p))
1822              {
1823              uschar *npp = getnamesptr;
1824              while (isalnum(*p)) *npp++ = *p++;
1825              *npp++ = 0;
1826              *npp = 0;
1827              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1828              if (n < 0)
1829                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1830              getnamesptr = npp;
1831              }
1832          continue;          continue;
1833    
1834          case 'L':          case 'L':
1835          getlist = 1;          getlist = 1;
1836          continue;          continue;
1837    
1838            case 'M':
1839            find_match_limit = 1;
1840            continue;
1841    
1842            case 'N':
1843            options |= PCRE_NOTEMPTY;
1844            continue;
1845    
1846          case 'O':          case 'O':
1847          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1848          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1849              {
1850              size_offsets_max = n;
1851              free(offsets);
1852              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1853              if (offsets == NULL)
1854                {
1855                printf("** Failed to get %d bytes of memory for offsets vector\n",
1856                  (int)(size_offsets_max * sizeof(int)));
1857                yield = 1;
1858                goto EXIT;
1859                }
1860              }
1861            use_size_offsets = n;
1862            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1863            continue;
1864    
1865            case 'P':
1866            options |= PCRE_PARTIAL;
1867            continue;
1868    
1869            case 'Q':
1870            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871            if (extra == NULL)
1872              {
1873              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874              extra->flags = 0;
1875              }
1876            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1877            extra->match_limit_recursion = n;
1878            continue;
1879    
1880            case 'q':
1881            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1882            if (extra == NULL)
1883              {
1884              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1885              extra->flags = 0;
1886              }
1887            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1888            extra->match_limit = n;
1889            continue;
1890    
1891    #if !defined NODFA
1892            case 'R':
1893            options |= PCRE_DFA_RESTART;
1894            continue;
1895    #endif
1896    
1897            case 'S':
1898            show_malloc = 1;
1899          continue;          continue;
1900    
1901          case 'Z':          case 'Z':
1902          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1903          continue;          continue;
1904    
1905            case '?':
1906            options |= PCRE_NO_UTF8_CHECK;
1907            continue;
1908    
1909            case '<':
1910              {
1911              int x = check_newline(p, outfile);
1912              if (x == 0) goto NEXT_DATA;
1913              options |= x;
1914              while (*p++ != '>');
1915              }
1916            continue;
1917          }          }
1918        *q++ = c;        *q++ = c;
1919        }        }
1920      *q = 0;      *q = 0;
1921      len = q - dbuffer;      len = q - dbuffer;
1922    
1923        if ((all_use_dfa || use_dfa) && find_match_limit)
1924          {
1925          printf("**Match limit not relevant for DFA matching: ignored\n");
1926          find_match_limit = 0;
1927          }
1928    
1929      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1930      support timing. */      support timing or playing with the match limit or callout data. */
1931    
1932    #if !defined NOPOSIX
1933      if (posix || do_posix)      if (posix || do_posix)
1934        {        {
1935        int rc;        int rc;
1936        int eflags = 0;        int eflags = 0;
1937        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1938          if (use_size_offsets > 0)
1939            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1940        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1941        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1942    
1943        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1944    
1945        if (rc != 0)        if (rc != 0)
1946          {          {
1947          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1948          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1949          }          }
1950          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1951                  != 0)
1952            {
1953            fprintf(outfile, "Matched with REG_NOSUB\n");
1954            }
1955        else        else
1956          {          {
1957          size_t i;          size_t i;
1958          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1959            {            {
1960            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1961              {              {
1962              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1963              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1964                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1965              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1966                if (i == 0 && do_showrest)
1967                  {
1968                  fprintf(outfile, " 0+ ");
1969                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1970                    outfile);
1971                  fprintf(outfile, "\n");
1972                  }
1973              }              }
1974            }            }
1975          }          }
1976          free(pmatch);
1977        }        }
1978    
1979      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1980    
1981      else      else
1982    #endif  /* !defined NOPOSIX */
1983    
1984        for (;; gmatched++)    /* Loop for /g or /G */
1985        {        {
1986        if (timeit)        if (timeitm > 0)
1987          {          {
1988          register int i;          register int i;
1989          clock_t time_taken;          clock_t time_taken;
1990          clock_t start_time = clock();          clock_t start_time = clock();
1991          for (i = 0; i < LOOPREPEAT; i++)  
1992            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
1993              size_offsets);          if (all_use_dfa || use_dfa)
1994              {
1995              int workspace[1000];
1996              for (i = 0; i < timeitm; i++)
1997                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1998                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1999                  sizeof(workspace)/sizeof(int));
2000              }
2001            else
2002    #endif
2003    
2004            for (i = 0; i < timeitm; i++)
2005              count = pcre_exec(re, extra, (char *)bptr, len,
2006                start_offset, options | g_notempty, use_offsets, use_size_offsets);
2007    
2008          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2009          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2010            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)timeitm) /
2011            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
2012            }
2013    
2014          /* If find_match_limit is set, we want to do repeated matches with
2015          varying limits in order to find the minimum value for the match limit and
2016          for the recursion limit. */
2017    
2018          if (find_match_limit)
2019            {
2020            if (extra == NULL)
2021              {
2022              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2023              extra->flags = 0;
2024              }
2025    
2026            (void)check_match_limit(re, extra, bptr, len, start_offset,
2027              options|g_notempty, use_offsets, use_size_offsets,
2028              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2029              PCRE_ERROR_MATCHLIMIT, "match()");
2030    
2031            count = check_match_limit(re, extra, bptr, len, start_offset,
2032              options|g_notempty, use_offsets, use_size_offsets,
2033              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2034              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2035          }          }
2036    
2037        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* If callout_data is set, use the interface with additional data */
2038          size_offsets);  
2039          else if (callout_data_set)
2040            {
2041            if (extra == NULL)
2042              {
2043              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2044              extra->flags = 0;
2045              }
2046            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2047            extra->callout_data = &callout_data;
2048            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2049              options | g_notempty, use_offsets, use_size_offsets);
2050            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2051            }
2052    
2053          /* The normal case is just to do the match once, with the default
2054          value of match_limit. */
2055    
2056    #if !defined NODFA
2057          else if (all_use_dfa || use_dfa)
2058            {
2059            int workspace[1000];
2060            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2061              options | g_notempty, use_offsets, use_size_offsets, workspace,
2062              sizeof(workspace)/sizeof(int));
2063            if (count == 0)
2064              {
2065              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2066              count = use_size_offsets/2;
2067              }
2068            }
2069    #endif
2070    
2071        if (count == 0)        else
2072          {          {
2073          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2074          count = size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2075            if (count == 0)
2076              {
2077              fprintf(outfile, "Matched, but too many substrings\n");
2078              count = use_size_offsets/3;
2079              }
2080          }          }
2081    
2082          /* Matched */
2083    
2084        if (count >= 0)        if (count >= 0)
2085          {          {
2086          int i;          int i, maxcount;
2087    
2088    #if !defined NODFA
2089            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2090    #endif
2091              maxcount = use_size_offsets/3;
2092    
2093            /* This is a check against a lunatic return value. */
2094    
2095            if (count > maxcount)
2096              {
2097              fprintf(outfile,
2098                "** PCRE error: returned count %d is too big for offset size %d\n",
2099                count, use_size_offsets);
2100              count = use_size_offsets/3;
2101              if (do_g || do_G)
2102                {
2103                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2104                do_g = do_G = FALSE;        /* Break g/G loop */
2105                }
2106              }
2107    
2108          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2109            {            {
2110            if (offsets[i] < 0)            if (use_offsets[i] < 0)
2111              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
2112            else            else
2113              {              {
2114              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
2115              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
2116                  use_offsets[i+1] - use_offsets[i], outfile);
2117              fprintf(outfile, "\n");              fprintf(outfile, "\n");
2118                if (i == 0)
2119                  {
2120                  if (do_showrest)
2121                    {
2122                    fprintf(outfile, " 0+ ");
2123                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2124                      outfile);
2125                    fprintf(outfile, "\n");
2126                    }
2127                  }
2128              }              }
2129            }            }
2130    
# Line 839  while (!done) Line 2132  while (!done)
2132            {            {
2133            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2134              {              {
2135              char buffer[16];              char copybuffer[256];
2136              int rc = pcre_copy_substring((char *)dbuffer, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2137                i, buffer, sizeof(buffer));                i, copybuffer, sizeof(copybuffer));
2138              if (rc < 0)              if (rc < 0)
2139                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2140              else              else
2141                fprintf(outfile, "%2dC %s (%d)\n", i, buffer, rc);                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2142              }              }
2143            }            }
2144    
2145            for (copynamesptr = copynames;
2146                 *copynamesptr != 0;
2147                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2148              {
2149              char copybuffer[256];
2150              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2151                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2152              if (rc < 0)
2153                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2154              else
2155                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2156              }
2157    
2158          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2159            {            {
2160            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
2161              {              {
2162              const char *substring;              const char *substring;
2163              int rc = pcre_get_substring((char *)dbuffer, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2164                i, &substring);                i, &substring);
2165              if (rc < 0)              if (rc < 0)
2166                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
2167              else              else
2168                {                {
2169                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2170                free((void *)substring);                pcre_free_substring(substring);
2171                }                }
2172              }              }
2173            }            }
2174    
2175            for (getnamesptr = getnames;
2176                 *getnamesptr != 0;
2177                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2178              {
2179              const char *substring;
2180              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2181                count, (char *)getnamesptr, &substring);
2182              if (rc < 0)
2183                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2184              else
2185                {
2186                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2187                pcre_free_substring(substring);
2188                }
2189              }
2190    
2191          if (getlist)          if (getlist)
2192            {            {
2193            const char **stringlist;            const char **stringlist;
2194            int rc = pcre_get_substring_list((char *)dbuffer, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2195              &stringlist);              &stringlist);
2196            if (rc < 0)            if (rc < 0)
2197              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 879  while (!done) Line 2201  while (!done)
2201                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2202              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
2203                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
2204              free((void *)stringlist);              /* free((void *)stringlist); */
2205                pcre_free_substring_list(stringlist);
2206              }              }
2207            }            }
2208            }
2209    
2210          /* There was a partial match */
2211    
2212          else if (count == PCRE_ERROR_PARTIAL)
2213            {
2214            fprintf(outfile, "Partial match");
2215    #if !defined NODFA
2216            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2217              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2218                bptr + use_offsets[0]);
2219    #endif
2220            fprintf(outfile, "\n");
2221            break;  /* Out of the /g loop */
2222          }          }
2223    
2224          /* Failed to match. If this is a /g or /G loop and we previously set
2225          g_notempty after a null match, this is not necessarily the end. We want
2226          to advance the start offset, and continue. We won't be at the end of the
2227          string - that was checked before setting g_notempty.
2228    
2229          Complication arises in the case when the newline option is "any" or
2230          "anycrlf". If the previous match was at the end of a line terminated by
2231          CRLF, an advance of one character just passes the \r, whereas we should
2232          prefer the longer newline sequence, as does the code in pcre_exec().
2233          Fudge the offset value to achieve this.
2234    
2235          Otherwise, in the case of UTF-8 matching, the advance must be one
2236          character, not one byte. */
2237    
2238        else        else
2239          {          {
2240          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2241              {
2242              int onechar = 1;
2243              unsigned int obits = ((real_pcre *)re)->options;
2244              use_offsets[0] = start_offset;
2245              if ((obits & PCRE_NEWLINE_BITS) == 0)
2246                {
2247                int d;
2248                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2249                obits = (d == '\r')? PCRE_NEWLINE_CR :
2250                        (d == '\n')? PCRE_NEWLINE_LF :
2251                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2252                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2253                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2254                }
2255              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2256                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2257                  &&
2258                  start_offset < len - 1 &&
2259                  bptr[start_offset] == '\r' &&
2260                  bptr[start_offset+1] == '\n')
2261                onechar++;
2262              else if (use_utf8)
2263                {
2264                while (start_offset + onechar < len)
2265                  {
2266                  int tb = bptr[start_offset+onechar];
2267                  if (tb <= 127) break;
2268                  tb &= 0xc0;
2269                  if (tb != 0 && tb != 0xc0) onechar++;
2270                  }
2271                }
2272              use_offsets[1] = start_offset + onechar;
2273              }
2274            else
2275              {
2276              if (count == PCRE_ERROR_NOMATCH)
2277                {
2278                if (gmatched == 0) fprintf(outfile, "No match\n");
2279                }
2280            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2281              break;  /* Out of the /g loop */
2282              }
2283          }          }
2284        }  
2285      }        /* If not /g or /G we are done */
2286    
2287          if (!do_g && !do_G) break;
2288    
2289          /* If we have matched an empty string, first check to see if we are at
2290          the end of the subject. If so, the /g loop is over. Otherwise, mimic
2291          what Perl's /g options does. This turns out to be rather cunning. First
2292          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2293          same point. If this fails (picked up above) we advance to the next
2294          character. */
2295    
2296          g_notempty = 0;
2297    
2298          if (use_offsets[0] == use_offsets[1])
2299            {
2300            if (use_offsets[0] == len) break;
2301            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2302            }
2303    
2304          /* For /g, update the start offset, leaving the rest alone */
2305    
2306          if (do_g) start_offset = use_offsets[1];
2307    
2308          /* For /G, update the pointer and length */
2309    
2310          else
2311            {
2312            bptr += use_offsets[1];
2313            len -= use_offsets[1];
2314            }
2315          }  /* End of loop for /g and /G */
2316    
2317        NEXT_DATA: continue;
2318        }    /* End of loop for data lines */
2319    
2320    CONTINUE:    CONTINUE:
2321    
2322    #if !defined NOPOSIX
2323    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2324    if (re != NULL) free(re);  #endif
2325    if (extra != NULL) free(extra);  
2326      if (re != NULL) new_free(re);
2327      if (extra != NULL) new_free(extra);
2328    if (tables != NULL)    if (tables != NULL)
2329      {      {
2330      free((void *)tables);      new_free((void *)tables);
2331      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2332        locale_set = 0;
2333      }      }
2334    }    }
2335    
2336  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2337  return 0;  
2338    EXIT:
2339    
2340    if (infile != NULL && infile != stdin) fclose(infile);
2341    if (outfile != NULL && outfile != stdout) fclose(outfile);
2342    
2343    free(buffer);
2344    free(dbuffer);
2345    free(pbuffer);
2346    free(offsets);
2347    
2348    return yield;
2349  }  }
2350    
2351  /* End */  /* End of pcretest.c */

Legend:
Removed from v.31  
changed lines
  Added in v.200

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12