/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 73 by nigel, Sat Feb 24 21:40:30 2007 UTC revision 123 by ph10, Mon Mar 12 15:19:06 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 42  been extended and consequently is now ra
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47    
48    /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
 /* We need the internal info for displaying the results of pcre_study(). Also  
 for getting the opcodes for showing compiled code. */  
69    
70  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
71  #include "internal.h"  
72    /* We include pcre_internal.h because we need the internal info for displaying
73    the results of pcre_study() and we also need to know about the internal
74    macros, structures, and other internal data values; pcretest has "inside
75    information" compared to a program that strictly follows the PCRE API. */
76    
77    #include "pcre_internal.h"
78    
79    /* We need access to the data tables that PCRE uses. So as not to have to keep
80    two copies, we include the source file here, changing the names of the external
81    symbols to prevent clashes. */
82    
83    #define _pcre_utf8_table1      utf8_table1
84    #define _pcre_utf8_table1_size utf8_table1_size
85    #define _pcre_utf8_table2      utf8_table2
86    #define _pcre_utf8_table3      utf8_table3
87    #define _pcre_utf8_table4      utf8_table4
88    #define _pcre_utt              utt
89    #define _pcre_utt_size         utt_size
90    #define _pcre_OP_lengths       OP_lengths
91    
92    #include "pcre_tables.c"
93    
94    /* We also need the pcre_printint() function for printing out compiled
95    patterns. This function is in a separate file so that it can be included in
96    pcre_compile.c when that module is compiled with debugging enabled.
97    
98    The definition of the macro PRINTABLE, which determines whether to print an
99    output character as-is or as a hex value when showing compiled patterns, is
100    contained in this file. We uses it here also, in cases when the locale has not
101    been explicitly changed, so as to get consistent output from systems that
102    differ in their output from isprint() even in the "C" locale. */
103    
104    #include "pcre_printint.src"
105    
106    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107    
108    
109  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
110  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 114  Makefile. */
114  #include "pcreposix.h"  #include "pcreposix.h"
115  #endif  #endif
116    
117    /* It is also possible, for the benefit of the version currently imported into
118    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
119    interface to the DFA matcher (NODFA), and without the doublecheck of the old
120    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
121    UTF8 support if PCRE is built without it. */
122    
123    #ifndef SUPPORT_UTF8
124    #ifndef NOUTF8
125    #define NOUTF8
126    #endif
127    #endif
128    
129    
130    /* Other parameters */
131    
132  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
133  #ifdef CLK_TCK  #ifdef CLK_TCK
134  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 137  Makefile. */
137  #endif  #endif
138  #endif  #endif
139    
140  #define LOOPREPEAT 50000  /* This is the default loop count for timing. */
141    
142  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define DBUFFER_SIZE BUFFER_SIZE  
143    
144    /* Static variables */
145    
146  static FILE *outfile;  static FILE *outfile;
147  static int log_store = 0;  static int log_store = 0;
# Line 48  static int callout_extra; Line 150  static int callout_extra;
150  static int callout_fail_count;  static int callout_fail_count;
151  static int callout_fail_id;  static int callout_fail_id;
152  static int first_callout;  static int first_callout;
153    static int locale_set = 0;
154  static int show_malloc;  static int show_malloc;
155  static int use_utf8;  static int use_utf8;
156  static size_t gotten_store;  static size_t gotten_store;
157    
158    /* The buffers grow automatically if very long input lines are encountered. */
159    
160  static const int utf8_table1[] = {  static int buffer_size = 50000;
161    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  static uschar *buffer = NULL;
162    static uschar *dbuffer = NULL;
163  static const int utf8_table2[] = {  static uschar *pbuffer = NULL;
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static const int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
164    
165    
166    
167  /*************************************************  /*************************************************
168  *         Print compiled regex                   *  *        Read or extend an input line            *
169  *************************************************/  *************************************************/
170    
171  /* The code for doing this is held in a separate file that is also included in  /* Input lines are read into buffer, but both patterns and data lines can be
172  pcre.c when it is compiled with the debug switch. It defines a function called  continued over multiple input lines. In addition, if the buffer fills up, we
173  print_internals(), which uses a table of opcode lengths defined by the macro  want to automatically expand it so as to be able to handle extremely large
174  OP_LENGTHS, whose name must be OP_lengths. */  lines that are needed for certain stress tests. When the input buffer is
175    expanded, the other two buffers must also be expanded likewise, and the
176    contents of pbuffer, which are a copy of the input for callouts, must be
177    preserved (for when expansion happens for a data line). This is not the most
178    optimal way of handling this, but hey, this is just a test program!
179    
180    Arguments:
181      f            the file to read
182      start        where in buffer to start (this *must* be within buffer)
183    
184    Returns:       pointer to the start of new data
185                   could be a copy of start, or could be moved
186                   NULL if no data read and EOF reached
187    */
188    
189    static uschar *
190    extend_inputline(FILE *f, uschar *start)
191    {
192    uschar *here = start;
193    
194    for (;;)
195      {
196      int rlen = buffer_size - (here - buffer);
197    
198      if (rlen > 1000)
199        {
200        int dlen;
201        if (fgets((char *)here, rlen,  f) == NULL)
202          return (here == start)? NULL : start;
203        dlen = (int)strlen((char *)here);
204        if (dlen > 0 && here[dlen - 1] == '\n') return start;
205        here += dlen;
206        }
207    
208      else
209        {
210        int new_buffer_size = 2*buffer_size;
211        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
212        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
213        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
214    
215        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
216          {
217          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
218          exit(1);
219          }
220    
221        memcpy(new_buffer, buffer, buffer_size);
222        memcpy(new_pbuffer, pbuffer, buffer_size);
223    
224        buffer_size = new_buffer_size;
225    
226        start = new_buffer + (start - buffer);
227        here = new_buffer + (here - buffer);
228    
229        free(buffer);
230        free(dbuffer);
231        free(pbuffer);
232    
233        buffer = new_buffer;
234        dbuffer = new_dbuffer;
235        pbuffer = new_pbuffer;
236        }
237      }
238    
239    return NULL;  /* Control never gets here */
240    }
241    
242    
243    
 static uschar OP_lengths[] = { OP_LENGTHS };  
244    
 #include "printint.c"  
245    
246    
247    
# Line 85  static uschar OP_lengths[] = { OP_LENGTH Line 251  static uschar OP_lengths[] = { OP_LENGTH
251    
252  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
253  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
254  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
255    
256  Arguments:  Arguments:
257    str           string to be converted    str           string to be converted
# Line 106  return(result); Line 272  return(result);
272    
273    
274    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
275    
276  /*************************************************  /*************************************************
277  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 151  return i + 1; Line 281  return i + 1;
281  and returns the value of the character.  and returns the value of the character.
282    
283  Argument:  Argument:
284    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
285    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
286    
287  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
288             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
289  */  */
290    
291    #if !defined NOUTF8
292    
293  static int  static int
294  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
295  {  {
296  int c = *buffer++;  int c = *utf8bytes++;
297  int d = c;  int d = c;
298  int i, j, s;  int i, j, s;
299    
# Line 181  d = (c & utf8_table3[i]) << s; Line 313  d = (c & utf8_table3[i]) << s;
313    
314  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
315    {    {
316    c = *buffer++;    c = *utf8bytes++;
317    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
318    s -= 6;    s -= 6;
319    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 189  for (j = 0; j < i; j++) Line 321  for (j = 0; j < i; j++)
321    
322  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
323    
324  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
325    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
326  if (j != i) return -(i+1);  if (j != i) return -(i+1);
327    
# Line 199  if (j != i) return -(i+1); Line 331  if (j != i) return -(i+1);
331  return i+1;  return i+1;
332  }  }
333    
334    #endif
335    
336    
337    
338    /*************************************************
339    *       Convert character value to UTF-8         *
340    *************************************************/
341    
342    /* This function takes an integer value in the range 0 - 0x7fffffff
343    and encodes it as a UTF-8 character in 0 to 6 bytes.
344    
345    Arguments:
346      cvalue     the character value
347      utf8bytes  pointer to buffer for result - at least 6 bytes long
348    
349    Returns:     number of characters placed in the buffer
350    */
351    
352    #if !defined NOUTF8
353    
354    static int
355    ord2utf8(int cvalue, uschar *utf8bytes)
356    {
357    register int i, j;
358    for (i = 0; i < utf8_table1_size; i++)
359      if (cvalue <= utf8_table1[i]) break;
360    utf8bytes += i;
361    for (j = i; j > 0; j--)
362     {
363     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
364     cvalue >>= 6;
365     }
366    *utf8bytes = utf8_table2[i] | cvalue;
367    return i + 1;
368    }
369    
370    #endif
371    
372    
373    
374  /*************************************************  /*************************************************
# Line 211  chars without printing. */ Line 381  chars without printing. */
381    
382  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
383  {  {
384  int c;  int c = 0;
385  int yield = 0;  int yield = 0;
386    
387  while (length-- > 0)  while (length-- > 0)
388    {    {
389    #if !defined NOUTF8
390    if (use_utf8)    if (use_utf8)
391      {      {
392      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 224  while (length-- > 0) Line 395  while (length-- > 0)
395        {        {
396        length -= rc - 1;        length -= rc - 1;
397        p += rc;        p += rc;
398        if (c < 256 && isprint(c))        if (PRINTHEX(c))
399          {          {
400          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
401          yield++;          yield++;
402          }          }
403        else        else
404          {          {
405          int n;          int n = 4;
406          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
407          yield += n;          yield += (n <= 0x000000ff)? 2 :
408                     (n <= 0x00000fff)? 3 :
409                     (n <= 0x0000ffff)? 4 :
410                     (n <= 0x000fffff)? 5 : 6;
411          }          }
412        continue;        continue;
413        }        }
414      }      }
415    #endif
416    
417     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
418    
419    if (isprint(c = *(p++)))    c = *p++;
420      if (PRINTHEX(c))
421      {      {
422      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
423      yield++;      yield++;
# Line 269  data is not zero. */ Line 445  data is not zero. */
445  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
446  {  {
447  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
448  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
449    
450  if (callout_extra)  if (callout_extra)
451    {    {
# Line 300  pre_start = pchars((unsigned char *)cb-> Line 476  pre_start = pchars((unsigned char *)cb->
476  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
477    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
478    
479    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
480    
481  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
482    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
483    
484  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
485    
486  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
487  shown */  shown. For automatic callouts, show the pattern offset. */
488    
489  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
490    else fprintf(outfile, "%3d ", cb->callout_number);    {
491      fprintf(outfile, "%+3d ", cb->pattern_position);
492      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
493      }
494    else
495      {
496      if (callout_extra) fprintf(outfile, "    ");
497        else fprintf(outfile, "%3d ", cb->callout_number);
498      }
499    
500  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
501  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 320  if (post_start > 0) Line 506  if (post_start > 0)
506    fprintf(outfile, "^");    fprintf(outfile, "^");
507    }    }
508    
509    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
510      fprintf(outfile, " ");
511    
512    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
513      pbuffer + cb->pattern_position);
514    
515  fprintf(outfile, "\n");  fprintf(outfile, "\n");
516  first_callout = 0;  first_callout = 0;
517    
# Line 350  static void *new_malloc(size_t size) Line 542  static void *new_malloc(size_t size)
542  void *block = malloc(size);  void *block = malloc(size);
543  gotten_store = size;  gotten_store = size;
544  if (show_malloc)  if (show_malloc)
545    fprintf(outfile, "malloc       %3d %p\n", size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
546  return block;  return block;
547  }  }
548    
# Line 368  static void *stack_malloc(size_t size) Line 560  static void *stack_malloc(size_t size)
560  {  {
561  void *block = malloc(size);  void *block = malloc(size);
562  if (show_malloc)  if (show_malloc)
563    fprintf(outfile, "stack_malloc %3d %p\n", size, block);    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
564  return block;  return block;
565  }  }
566    
# Line 396  if ((rc = pcre_fullinfo(re, study, optio Line 588  if ((rc = pcre_fullinfo(re, study, optio
588    
589    
590  /*************************************************  /*************************************************
591    *         Byte flipping function                 *
592    *************************************************/
593    
594    static unsigned long int
595    byteflip(unsigned long int value, int n)
596    {
597    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
598    return ((value & 0x000000ff) << 24) |
599           ((value & 0x0000ff00) <<  8) |
600           ((value & 0x00ff0000) >>  8) |
601           ((value & 0xff000000) >> 24);
602    }
603    
604    
605    
606    
607    /*************************************************
608    *        Check match or recursion limit          *
609    *************************************************/
610    
611    static int
612    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
613      int start_offset, int options, int *use_offsets, int use_size_offsets,
614      int flag, unsigned long int *limit, int errnumber, const char *msg)
615    {
616    int count;
617    int min = 0;
618    int mid = 64;
619    int max = -1;
620    
621    extra->flags |= flag;
622    
623    for (;;)
624      {
625      *limit = mid;
626    
627      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
628        use_offsets, use_size_offsets);
629    
630      if (count == errnumber)
631        {
632        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
633        min = mid;
634        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
635        }
636    
637      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
638                             count == PCRE_ERROR_PARTIAL)
639        {
640        if (mid == min + 1)
641          {
642          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
643          break;
644          }
645        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
646        max = mid;
647        mid = (min + mid)/2;
648        }
649      else break;    /* Some other error */
650      }
651    
652    extra->flags &= ~flag;
653    return count;
654    }
655    
656    
657    
658    /*************************************************
659    *         Check newline indicator                *
660    *************************************************/
661    
662    /* This is used both at compile and run-time to check for <xxx> escapes, where
663    xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
664    
665    Arguments:
666      p           points after the leading '<'
667      f           file for error message
668    
669    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
670    */
671    
672    static int
673    check_newline(uschar *p, FILE *f)
674    {
675    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
676    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
677    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
678    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
679    fprintf(f, "Unknown newline type at: <%s\n", p);
680    return 0;
681    }
682    
683    
684    
685    /*************************************************
686    *             Usage function                     *
687    *************************************************/
688    
689    static void
690    usage(void)
691    {
692    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
693    printf("  -b       show compiled code (bytecode)\n");
694    printf("  -C       show PCRE compile-time options and exit\n");
695    printf("  -d       debug: show compiled code and information (-b and -i)\n");
696    #if !defined NODFA
697    printf("  -dfa     force DFA matching for all subjects\n");
698    #endif
699    printf("  -help    show usage information\n");
700    printf("  -i       show information about compiled patterns\n"
701           "  -m       output memory used information\n"
702           "  -o <n>   set size of offsets vector to <n>\n");
703    #if !defined NOPOSIX
704    printf("  -p       use POSIX interface\n");
705    #endif
706    printf("  -q       quiet: do not output PCRE version number at start\n");
707    printf("  -S <n>   set stack size to <n> megabytes\n");
708    printf("  -s       output store (memory) used information\n"
709           "  -t       time compilation and execution\n");
710    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
711    printf("  -tm      time execution (matching) only\n");
712    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
713    }
714    
715    
716    
717    /*************************************************
718  *                Main Program                    *  *                Main Program                    *
719  *************************************************/  *************************************************/
720    
# Line 410  int options = 0; Line 729  int options = 0;
729  int study_options = 0;  int study_options = 0;
730  int op = 1;  int op = 1;
731  int timeit = 0;  int timeit = 0;
732    int timeitm = 0;
733  int showinfo = 0;  int showinfo = 0;
734  int showstore = 0;  int showstore = 0;
735    int quiet = 0;
736  int size_offsets = 45;  int size_offsets = 45;
737  int size_offsets_max;  int size_offsets_max;
738  int *offsets;  int *offsets = NULL;
739  #if !defined NOPOSIX  #if !defined NOPOSIX
740  int posix = 0;  int posix = 0;
741  #endif  #endif
742  int debug = 0;  int debug = 0;
743  int done = 0;  int done = 0;
744    int all_use_dfa = 0;
745    int yield = 0;
746    int stack_size;
747    
748    /* These vectors store, end-to-end, a list of captured substring names. Assume
749    that 1024 is plenty long enough for the few names we'll be testing. */
750    
751    uschar copynames[1024];
752    uschar getnames[1024];
753    
754  unsigned char *buffer;  uschar *copynamesptr;
755  unsigned char *dbuffer;  uschar *getnamesptr;
756    
757  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
758  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
759    
760  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
761  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
762    pbuffer = (unsigned char *)malloc(buffer_size);
763    
764  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
765    
766  outfile = stdout;  outfile = stdout;
767    
768    /* The following  _setmode() stuff is some Windows magic that tells its runtime
769    library to translate CRLF into a single LF character. At least, that's what
770    I've been told: never having used Windows I take this all on trust. Originally
771    it set 0x8000, but then I was advised that _O_BINARY was better. */
772    
773    #if defined(_WIN32) || defined(WIN32)
774    _setmode( _fileno( stdout ), _O_BINARY );
775    #endif
776    
777  /* Scan options */  /* Scan options */
778    
779  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 442  while (argc > 1 && argv[op][0] == '-') Line 782  while (argc > 1 && argv[op][0] == '-')
782    
783    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
784      showstore = 1;      showstore = 1;
785    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
786      else if (strcmp(argv[op], "-b") == 0) debug = 1;
787    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
788    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
789    #if !defined NODFA
790      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
791    #endif
792    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
793        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
794          *endptr == 0))          *endptr == 0))
# Line 452  while (argc > 1 && argv[op][0] == '-') Line 796  while (argc > 1 && argv[op][0] == '-')
796      op++;      op++;
797      argc--;      argc--;
798      }      }
799      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
800        {
801        int both = argv[op][2] == 0;
802        int temp;
803        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
804                         *endptr == 0))
805          {
806          timeitm = temp;
807          op++;
808          argc--;
809          }
810        else timeitm = LOOPREPEAT;
811        if (both) timeit = timeitm;
812        }
813      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
814          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
815            *endptr == 0))
816        {
817    #if defined(_WIN32) || defined(WIN32)
818        printf("PCRE: -S not supported on this OS\n");
819        exit(1);
820    #else
821        int rc;
822        struct rlimit rlim;
823        getrlimit(RLIMIT_STACK, &rlim);
824        rlim.rlim_cur = stack_size * 1024 * 1024;
825        rc = setrlimit(RLIMIT_STACK, &rlim);
826        if (rc != 0)
827          {
828        printf("PCRE: setrlimit() failed with error %d\n", rc);
829        exit(1);
830          }
831        op++;
832        argc--;
833    #endif
834        }
835  #if !defined NOPOSIX  #if !defined NOPOSIX
836    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
837  #endif  #endif
# Line 462  while (argc > 1 && argv[op][0] == '-') Line 842  while (argc > 1 && argv[op][0] == '-')
842      printf("Compiled with\n");      printf("Compiled with\n");
843      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
844      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
845        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
846        printf("  %sUnicode properties support\n", rc? "" : "No ");
847      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
848      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
849          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
850          (rc == -1)? "ANY" : "???");
851      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
852      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
853      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
854      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
855      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
856      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
857        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
858        printf("  Default recursion depth limit = %d\n", rc);
859      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
860      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
861      exit(0);      goto EXIT;
862        }
863      else if (strcmp(argv[op], "-help") == 0 ||
864               strcmp(argv[op], "--help") == 0)
865        {
866        usage();
867        goto EXIT;
868      }      }
869    else    else
870      {      {
871      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
872      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
873      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
874      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
875      }      }
876    op++;    op++;
877    argc--;    argc--;
# Line 501  if (offsets == NULL) Line 885  if (offsets == NULL)
885    {    {
886    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
887      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
888    return 1;    yield = 1;
889      goto EXIT;
890    }    }
891    
892  /* Sort out the input and output files */  /* Sort out the input and output files */
893    
894  if (argc > 1)  if (argc > 1)
895    {    {
896    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
897    if (infile == NULL)    if (infile == NULL)
898      {      {
899      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
900      return 1;      yield = 1;
901        goto EXIT;
902      }      }
903    }    }
904    
905  if (argc > 2)  if (argc > 2)
906    {    {
907    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
908    if (outfile == NULL)    if (outfile == NULL)
909      {      {
910      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
911      return 1;      yield = 1;
912        goto EXIT;
913      }      }
914    }    }
915    
# Line 533  pcre_free = new_free; Line 920  pcre_free = new_free;
920  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
921  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
922    
923  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
924    
925  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
926    
927  /* Main loop */  /* Main loop */
928    
# Line 551  while (!done) Line 938  while (!done)
938    
939    const char *error;    const char *error;
940    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
941      unsigned char *to_file = NULL;
942    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
943      unsigned long int true_size, true_study_size = 0;
944      size_t size, regex_gotten_store;
945    int do_study = 0;    int do_study = 0;
946    int do_debug = debug;    int do_debug = debug;
947      int debug_lengths = 1;
948    int do_G = 0;    int do_G = 0;
949    int do_g = 0;    int do_g = 0;
950    int do_showinfo = showinfo;    int do_showinfo = showinfo;
951    int do_showrest = 0;    int do_showrest = 0;
952    int erroroffset, len, delimiter;    int do_flip = 0;
953      int erroroffset, len, delimiter, poffset;
954    
955    use_utf8 = 0;    use_utf8 = 0;
956    
957    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
958    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
959    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
960    fflush(outfile);    fflush(outfile);
961    
# Line 571  while (!done) Line 963  while (!done)
963    while (isspace(*p)) p++;    while (isspace(*p)) p++;
964    if (*p == 0) continue;    if (*p == 0) continue;
965    
966    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
967    complete, read more. */  
968      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
969        {
970        unsigned long int magic, get_options;
971        uschar sbuf[8];
972        FILE *f;
973    
974        p++;
975        pp = p + (int)strlen((char *)p);
976        while (isspace(pp[-1])) pp--;
977        *pp = 0;
978    
979        f = fopen((char *)p, "rb");
980        if (f == NULL)
981          {
982          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
983          continue;
984          }
985    
986        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
987    
988        true_size =
989          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
990        true_study_size =
991          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
992    
993        re = (real_pcre *)new_malloc(true_size);
994        regex_gotten_store = gotten_store;
995    
996        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
997    
998        magic = ((real_pcre *)re)->magic_number;
999        if (magic != MAGIC_NUMBER)
1000          {
1001          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1002            {
1003            do_flip = 1;
1004            }
1005          else
1006            {
1007            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1008            fclose(f);
1009            continue;
1010            }
1011          }
1012    
1013        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1014          do_flip? " (byte-inverted)" : "", p);
1015    
1016        /* Need to know if UTF-8 for printing data strings */
1017    
1018        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1019        use_utf8 = (get_options & PCRE_UTF8) != 0;
1020    
1021        /* Now see if there is any following study data */
1022    
1023        if (true_study_size != 0)
1024          {
1025          pcre_study_data *psd;
1026    
1027          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1028          extra->flags = PCRE_EXTRA_STUDY_DATA;
1029    
1030          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1031          extra->study_data = psd;
1032    
1033          if (fread(psd, 1, true_study_size, f) != true_study_size)
1034            {
1035            FAIL_READ:
1036            fprintf(outfile, "Failed to read data from %s\n", p);
1037            if (extra != NULL) new_free(extra);
1038            if (re != NULL) new_free(re);
1039            fclose(f);
1040            continue;
1041            }
1042          fprintf(outfile, "Study data loaded from %s\n", p);
1043          do_study = 1;     /* To get the data output if requested */
1044          }
1045        else fprintf(outfile, "No study data\n");
1046    
1047        fclose(f);
1048        goto SHOW_INFO;
1049        }
1050    
1051      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1052      the pattern; if is isn't complete, read more. */
1053    
1054    delimiter = *p++;    delimiter = *p++;
1055    
# Line 583  while (!done) Line 1060  while (!done)
1060      }      }
1061    
1062    pp = p;    pp = p;
1063      poffset = p - buffer;
1064    
1065    for(;;)    for(;;)
1066      {      {
# Line 593  while (!done) Line 1071  while (!done)
1071        pp++;        pp++;
1072        }        }
1073      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1074      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1075      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1076        {        {
1077        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1078        done = 1;        done = 1;
# Line 611  while (!done) Line 1081  while (!done)
1081      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1082      }      }
1083    
1084      /* The buffer may have moved while being extended; reset the start of data
1085      pointer to the correct relative point in the buffer. */
1086    
1087      p = buffer + poffset;
1088    
1089    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1090    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1091    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1092    
1093    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1094    
1095    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1096      for callouts. */
1097    
1098    *pp++ = 0;    *pp++ = 0;
1099      strcpy((char *)pbuffer, (char *)p);
1100    
1101    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1102    
# Line 631  while (!done) Line 1108  while (!done)
1108      {      {
1109      switch (*pp++)      switch (*pp++)
1110        {        {
1111          case 'f': options |= PCRE_FIRSTLINE; break;
1112        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1113        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1114        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 639  while (!done) Line 1117  while (!done)
1117    
1118        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1119        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1120          case 'B': do_debug = 1; break;
1121          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1122        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1123        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1124          case 'F': do_flip = 1; break;
1125        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1126        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1127          case 'J': options |= PCRE_DUPNAMES; break;
1128        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1129        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1130    
# Line 653  while (!done) Line 1135  while (!done)
1135        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1136        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1137        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1138          case 'Z': debug_lengths = 0;
1139        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1140        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1141    
1142        case 'L':        case 'L':
1143        ppp = pp;        ppp = pp;
1144        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1145          /* The '0' test is just in case this is an unterminated line. */
1146          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1147        *ppp = 0;        *ppp = 0;
1148        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1149          {          {
1150          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1151          goto SKIP_DATA;          goto SKIP_DATA;
1152          }          }
1153          locale_set = 1;
1154        tables = pcre_maketables();        tables = pcre_maketables();
1155        pp = ppp;        pp = ppp;
1156        break;        break;
1157    
1158        case '\n': case ' ': break;        case '>':
1159          to_file = pp;
1160          while (*pp != 0) pp++;
1161          while (isspace(pp[-1])) pp--;
1162          *pp = 0;
1163          break;
1164    
1165          case '<':
1166            {
1167            int x = check_newline(pp, outfile);
1168            if (x == 0) goto SKIP_DATA;
1169            options |= x;
1170            while (*pp++ != '>');
1171            }
1172          break;
1173    
1174          case '\r':                      /* So that it works in Windows */
1175          case '\n':
1176          case ' ':
1177          break;
1178    
1179        default:        default:
1180        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1181        goto SKIP_DATA;        goto SKIP_DATA;
# Line 685  while (!done) Line 1191  while (!done)
1191      {      {
1192      int rc;      int rc;
1193      int cflags = 0;      int cflags = 0;
1194    
1195      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1196      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1197        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1198        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1199        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1200    
1201      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1202    
1203      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 694  while (!done) Line 1205  while (!done)
1205    
1206      if (rc != 0)      if (rc != 0)
1207        {        {
1208        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1209        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1210        goto SKIP_DATA;        goto SKIP_DATA;
1211        }        }
# Line 706  while (!done) Line 1217  while (!done)
1217  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1218    
1219      {      {
1220      if (timeit)      if (timeit > 0)
1221        {        {
1222        register int i;        register int i;
1223        clock_t time_taken;        clock_t time_taken;
1224        clock_t start_time = clock();        clock_t start_time = clock();
1225        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1226          {          {
1227          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1228          if (re != NULL) free(re);          if (re != NULL) free(re);
1229          }          }
1230        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1231        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1232          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1233            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1234        }        }
1235    
# Line 735  while (!done) Line 1246  while (!done)
1246          {          {
1247          for (;;)          for (;;)
1248            {            {
1249            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1250              {              {
1251              done = 1;              done = 1;
1252              goto CONTINUE;              goto CONTINUE;
# Line 759  while (!done) Line 1270  while (!done)
1270                sizeof(real_pcre) -                sizeof(real_pcre) -
1271                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1272    
1273        /* Extract the size for possible writing before possibly flipping it,
1274        and remember the store that was got. */
1275    
1276        true_size = ((real_pcre *)re)->size;
1277        regex_gotten_store = gotten_store;
1278    
1279        /* If /S was present, study the regexp to generate additional info to
1280        help with the matching. */
1281    
1282        if (do_study)
1283          {
1284          if (timeit > 0)
1285            {
1286            register int i;
1287            clock_t time_taken;
1288            clock_t start_time = clock();
1289            for (i = 0; i < timeit; i++)
1290              extra = pcre_study(re, study_options, &error);
1291            time_taken = clock() - start_time;
1292            if (extra != NULL) free(extra);
1293            fprintf(outfile, "  Study time %.4f milliseconds\n",
1294              (((double)time_taken * 1000.0) / (double)timeit) /
1295                (double)CLOCKS_PER_SEC);
1296            }
1297          extra = pcre_study(re, study_options, &error);
1298          if (error != NULL)
1299            fprintf(outfile, "Failed to study: %s\n", error);
1300          else if (extra != NULL)
1301            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1302          }
1303    
1304        /* If the 'F' option was present, we flip the bytes of all the integer
1305        fields in the regex data block and the study block. This is to make it
1306        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1307        compiled on a different architecture. */
1308    
1309        if (do_flip)
1310          {
1311          real_pcre *rre = (real_pcre *)re;
1312          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1313          rre->size = byteflip(rre->size, sizeof(rre->size));
1314          rre->options = byteflip(rre->options, sizeof(rre->options));
1315          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1316          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1317          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1318          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1319          rre->name_table_offset = byteflip(rre->name_table_offset,
1320            sizeof(rre->name_table_offset));
1321          rre->name_entry_size = byteflip(rre->name_entry_size,
1322            sizeof(rre->name_entry_size));
1323          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1324    
1325          if (extra != NULL)
1326            {
1327            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1328            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1329            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1330            }
1331          }
1332    
1333        /* Extract information from the compiled data if required */
1334    
1335        SHOW_INFO:
1336    
1337        if (do_debug)
1338          {
1339          fprintf(outfile, "------------------------------------------------------------------\n");
1340          pcre_printint(re, outfile, debug_lengths);
1341          }
1342    
1343      if (do_showinfo)      if (do_showinfo)
1344        {        {
1345        unsigned long int get_options;        unsigned long int get_options, all_options;
1346    #if !defined NOINFOCHECK
1347        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1348    #endif
1349        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1350        int nameentrysize, namecount;        int nameentrysize, namecount;
1351        const uschar *nametable;        const uschar *nametable;
       size_t size;  
   
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
1352    
1353        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1354        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
# Line 784  while (!done) Line 1360  while (!done)
1360        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1361        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1362    
1363    #if !defined NOINFOCHECK
1364        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1365        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1366          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 801  while (!done) Line 1378  while (!done)
1378            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1379              get_options, old_options);              get_options, old_options);
1380          }          }
1381    #endif
1382    
1383        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1384          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1385          size, gotten_store);          (int)size, (int)regex_gotten_store);
1386    
1387        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1388        if (backrefmax > 0)        if (backrefmax > 0)
# Line 822  while (!done) Line 1400  while (!done)
1400            }            }
1401          }          }
1402    
1403          /* The NOPARTIAL bit is a private bit in the options, so we have
1404          to fish it out via out back door */
1405    
1406          all_options = ((real_pcre *)re)->options;
1407          if (do_flip)
1408            {
1409            all_options = byteflip(all_options, sizeof(all_options));
1410             }
1411    
1412          if ((all_options & PCRE_NOPARTIAL) != 0)
1413            fprintf(outfile, "Partial matching not supported\n");
1414    
1415        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1416          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1417            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1418            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1419            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1420            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1421              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1422            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1423            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1424            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1425            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1426              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1427            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1428            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1429              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1430    
1431        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        switch (get_options & PCRE_NEWLINE_BITS)
1432          fprintf(outfile, "Case state changes\n");          {
1433            case PCRE_NEWLINE_CR:
1434            fprintf(outfile, "Forced newline sequence: CR\n");
1435            break;
1436    
1437            case PCRE_NEWLINE_LF:
1438            fprintf(outfile, "Forced newline sequence: LF\n");
1439            break;
1440    
1441            case PCRE_NEWLINE_CRLF:
1442            fprintf(outfile, "Forced newline sequence: CRLF\n");
1443            break;
1444    
1445            case PCRE_NEWLINE_ANY:
1446            fprintf(outfile, "Forced newline sequence: ANY\n");
1447            break;
1448    
1449            default:
1450            break;
1451            }
1452    
1453        if (first_char == -1)        if (first_char == -1)
1454          {          {
1455          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1456          }          }
1457        else if (first_char < 0)        else if (first_char < 0)
1458          {          {
# Line 851  while (!done) Line 1463  while (!done)
1463          int ch = first_char & 255;          int ch = first_char & 255;
1464          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1465            "" : " (caseless)";            "" : " (caseless)";
1466          if (isprint(ch))          if (PRINTHEX(ch))
1467            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1468          else          else
1469            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 866  while (!done) Line 1478  while (!done)
1478          int ch = need_char & 255;          int ch = need_char & 255;
1479          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1480            "" : " (caseless)";            "" : " (caseless)";
1481          if (isprint(ch))          if (PRINTHEX(ch))
1482            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1483          else          else
1484            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1485          }          }
       }  
   
     /* If /S was present, study the regexp to generate additional info to  
     help with the matching. */  
   
     if (do_study)  
       {  
       if (timeit)  
         {  
         register int i;  
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /  
             (double)CLOCKS_PER_SEC);  
         }  
   
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1486    
1487        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
1488        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
1489        so messes up the test suite. */        so messes up the test suite. (And with the /F option, it might be
1490          flipped.) */
1491    
1492        else if (do_showinfo)        if (do_study)
1493          {          {
1494          size_t size;          if (extra == NULL)
1495          uschar *start_bits = NULL;            fprintf(outfile, "Study returned NULL\n");
         new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);  
         new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);  
         /* fprintf(outfile, "Study size = %d\n", size); */  
         if (start_bits == NULL)  
           fprintf(outfile, "No starting character set\n");  
1496          else          else
1497            {            {
1498            int i;            uschar *start_bits = NULL;
1499            int c = 24;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1500            fprintf(outfile, "Starting character set: ");  
1501            for (i = 0; i < 256; i++)            if (start_bits == NULL)
1502                fprintf(outfile, "No starting byte set\n");
1503              else
1504              {              {
1505              if ((start_bits[i/8] & (1<<(i%8))) != 0)              int i;
1506                int c = 24;
1507                fprintf(outfile, "Starting byte set: ");
1508                for (i = 0; i < 256; i++)
1509                {                {
1510                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
                 {  
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
1511                  {                  {
1512                  fprintf(outfile, "%c ", i);                  if (c > 75)
1513                  c += 2;                    {
1514                  }                    fprintf(outfile, "\n  ");
1515                else                    c = 2;
1516                  {                    }
1517                  fprintf(outfile, "\\x%02x ", i);                  if (PRINTHEX(i) && i != ' ')
1518                  c += 5;                    {
1519                      fprintf(outfile, "%c ", i);
1520                      c += 2;
1521                      }
1522                    else
1523                      {
1524                      fprintf(outfile, "\\x%02x ", i);
1525                      c += 5;
1526                      }
1527                  }                  }
1528                }                }
1529                fprintf(outfile, "\n");
1530              }              }
           fprintf(outfile, "\n");  
1531            }            }
1532          }          }
1533        }        }
1534      }  
1535        /* If the '>' option was present, we write out the regex to a file, and
1536        that is all. The first 8 bytes of the file are the regex length and then
1537        the study length, in big-endian order. */
1538    
1539        if (to_file != NULL)
1540          {
1541          FILE *f = fopen((char *)to_file, "wb");
1542          if (f == NULL)
1543            {
1544            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1545            }
1546          else
1547            {
1548            uschar sbuf[8];
1549            sbuf[0] = (true_size >> 24)  & 255;
1550            sbuf[1] = (true_size >> 16)  & 255;
1551            sbuf[2] = (true_size >>  8)  & 255;
1552            sbuf[3] = (true_size)  & 255;
1553    
1554            sbuf[4] = (true_study_size >> 24)  & 255;
1555            sbuf[5] = (true_study_size >> 16)  & 255;
1556            sbuf[6] = (true_study_size >>  8)  & 255;
1557            sbuf[7] = (true_study_size)  & 255;
1558    
1559            if (fwrite(sbuf, 1, 8, f) < 8 ||
1560                fwrite(re, 1, true_size, f) < true_size)
1561              {
1562              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1563              }
1564            else
1565              {
1566              fprintf(outfile, "Compiled regex written to %s\n", to_file);
1567              if (extra != NULL)
1568                {
1569                if (fwrite(extra->study_data, 1, true_study_size, f) <
1570                    true_study_size)
1571                  {
1572                  fprintf(outfile, "Write error on %s: %s\n", to_file,
1573                    strerror(errno));
1574                  }
1575                else fprintf(outfile, "Study data written to %s\n", to_file);
1576    
1577                }
1578              }
1579            fclose(f);
1580            }
1581    
1582          new_free(re);
1583          if (extra != NULL) new_free(extra);
1584          if (tables != NULL) new_free((void *)tables);
1585          continue;  /* With next regex */
1586          }
1587        }        /* End of non-POSIX compile */
1588    
1589    /* Read data lines and test them */    /* Read data lines and test them */
1590    
1591    for (;;)    for (;;)
1592      {      {
1593      unsigned char *q;      uschar *q;
1594      unsigned char *bptr = dbuffer;      uschar *bptr = dbuffer;
1595      int *use_offsets = offsets;      int *use_offsets = offsets;
1596      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1597      int callout_data = 0;      int callout_data = 0;
# Line 961  while (!done) Line 1604  while (!done)
1604      int gmatched = 0;      int gmatched = 0;
1605      int start_offset = 0;      int start_offset = 0;
1606      int g_notempty = 0;      int g_notempty = 0;
1607        int use_dfa = 0;
1608    
1609      options = 0;      options = 0;
1610    
1611        *copynames = 0;
1612        *getnames = 0;
1613    
1614        copynamesptr = copynames;
1615        getnamesptr = getnames;
1616    
1617      pcre_callout = callout;      pcre_callout = callout;
1618      first_callout = 1;      first_callout = 1;
1619      callout_extra = 0;      callout_extra = 0;
# Line 972  while (!done) Line 1622  while (!done)
1622      callout_fail_id = -1;      callout_fail_id = -1;
1623      show_malloc = 0;      show_malloc = 0;
1624    
1625      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1626      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1627    
1628        len = 0;
1629        for (;;)
1630        {        {
1631        done = 1;        if (infile == stdin) printf("data> ");
1632        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1633            {
1634            if (len > 0) break;
1635            done = 1;
1636            goto CONTINUE;
1637            }
1638          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1639          len = (int)strlen((char *)buffer);
1640          if (buffer[len-1] == '\n') break;
1641        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1642    
     len = (int)strlen((char *)buffer);  
1643      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1644      buffer[len] = 0;      buffer[len] = 0;
1645      if (len == 0) break;      if (len == 0) break;
# Line 1010  while (!done) Line 1669  while (!done)
1669          c -= '0';          c -= '0';
1670          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1671            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1672    
1673    #if !defined NOUTF8
1674            if (use_utf8 && c > 255)
1675              {
1676              unsigned char buff8[8];
1677              int ii, utn;
1678              utn = ord2utf8(c, buff8);
1679              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1680              c = buff8[ii];   /* Last byte */
1681              }
1682    #endif
1683          break;          break;
1684    
1685          case 'x':          case 'x':
1686    
1687          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1688    
1689    #if !defined NOUTF8
1690          if (*p == '{')          if (*p == '{')
1691            {            {
1692            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1034  while (!done) Line 1705  while (!done)
1705              }              }
1706            /* Not correct form; fall through */            /* Not correct form; fall through */
1707            }            }
1708    #endif
1709    
1710          /* Ordinary \x */          /* Ordinary \x */
1711    
# Line 1045  while (!done) Line 1717  while (!done)
1717            }            }
1718          break;          break;
1719    
1720          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1721          p--;          p--;
1722          continue;          continue;
1723    
1724            case '>':
1725            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1726            continue;
1727    
1728          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1729          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1730          continue;          continue;
# Line 1065  while (!done) Line 1741  while (!done)
1741            }            }
1742          else if (isalnum(*p))          else if (isalnum(*p))
1743            {            {
1744            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1745            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1746              *npp++ = 0;
1747            *npp = 0;            *npp = 0;
1748            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1749            if (n < 0)            if (n < 0)
1750              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1751            else copystrings |= 1 << n;            copynamesptr = npp;
1752            }            }
1753          else if (*p == '+')          else if (*p == '+')
1754            {            {
# Line 1110  while (!done) Line 1786  while (!done)
1786            }            }
1787          continue;          continue;
1788    
1789    #if !defined NODFA
1790            case 'D':
1791    #if !defined NOPOSIX
1792            if (posix || do_posix)
1793              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1794            else
1795    #endif
1796              use_dfa = 1;
1797            continue;
1798    
1799            case 'F':
1800            options |= PCRE_DFA_SHORTEST;
1801            continue;
1802    #endif
1803    
1804          case 'G':          case 'G':
1805          if (isdigit(*p))          if (isdigit(*p))
1806            {            {
# Line 1118  while (!done) Line 1809  while (!done)
1809            }            }
1810          else if (isalnum(*p))          else if (isalnum(*p))
1811            {            {
1812            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1813            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1814              *npp++ = 0;
1815            *npp = 0;            *npp = 0;
1816            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1817            if (n < 0)            if (n < 0)
1818              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1819            else getstrings |= 1 << n;            getnamesptr = npp;
1820            }            }
1821          continue;          continue;
1822    
# Line 1152  while (!done) Line 1843  while (!done)
1843              {              {
1844              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1845                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1846              return 1;              yield = 1;
1847                goto EXIT;
1848              }              }
1849            }            }
1850          use_size_offsets = n;          use_size_offsets = n;
1851          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1852          continue;          continue;
1853    
1854            case 'P':
1855            options |= PCRE_PARTIAL;
1856            continue;
1857    
1858            case 'Q':
1859            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1860            if (extra == NULL)
1861              {
1862              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1863              extra->flags = 0;
1864              }
1865            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1866            extra->match_limit_recursion = n;
1867            continue;
1868    
1869            case 'q':
1870            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871            if (extra == NULL)
1872              {
1873              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874              extra->flags = 0;
1875              }
1876            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1877            extra->match_limit = n;
1878            continue;
1879    
1880    #if !defined NODFA
1881            case 'R':
1882            options |= PCRE_DFA_RESTART;
1883            continue;
1884    #endif
1885    
1886          case 'S':          case 'S':
1887          show_malloc = 1;          show_malloc = 1;
1888          continue;          continue;
# Line 1170  while (!done) Line 1894  while (!done)
1894          case '?':          case '?':
1895          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
1896          continue;          continue;
1897    
1898            case '<':
1899              {
1900              int x = check_newline(p, outfile);
1901              if (x == 0) goto NEXT_DATA;
1902              options |= x;
1903              while (*p++ != '>');
1904              }
1905            continue;
1906          }          }
1907        *q++ = c;        *q++ = c;
1908        }        }
1909      *q = 0;      *q = 0;
1910      len = q - dbuffer;      len = q - dbuffer;
1911    
1912        if ((all_use_dfa || use_dfa) && find_match_limit)
1913          {
1914          printf("**Match limit not relevant for DFA matching: ignored\n");
1915          find_match_limit = 0;
1916          }
1917    
1918      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1919      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1920    
# Line 1194  while (!done) Line 1933  while (!done)
1933    
1934        if (rc != 0)        if (rc != 0)
1935          {          {
1936          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1937          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1938          }          }
1939          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1940                  != 0)
1941            {
1942            fprintf(outfile, "Matched with REG_NOSUB\n");
1943            }
1944        else        else
1945          {          {
1946          size_t i;          size_t i;
# Line 1228  while (!done) Line 1972  while (!done)
1972    
1973      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1974        {        {
1975        if (timeit)        if (timeitm > 0)
1976          {          {
1977          register int i;          register int i;
1978          clock_t time_taken;          clock_t time_taken;
1979          clock_t start_time = clock();          clock_t start_time = clock();
1980          for (i = 0; i < LOOPREPEAT; i++)  
1981    #if !defined NODFA
1982            if (all_use_dfa || use_dfa)
1983              {
1984              int workspace[1000];
1985              for (i = 0; i < timeitm; i++)
1986                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1987                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1988                  sizeof(workspace)/sizeof(int));
1989              }
1990            else
1991    #endif
1992    
1993            for (i = 0; i < timeitm; i++)
1994            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1995              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1996    
1997          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1998          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
1999            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2000              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2001          }          }
2002    
2003        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2004        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2005          for the recursion limit. */
2006    
2007        if (find_match_limit)        if (find_match_limit)
2008          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2009          if (extra == NULL)          if (extra == NULL)
2010            {            {
2011            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2012            extra->flags = 0;            extra->flags = 0;
2013            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
2014    
2015          for (;;)          (void)check_match_limit(re, extra, bptr, len, start_offset,
2016            {            options|g_notempty, use_offsets, use_size_offsets,
2017            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2018            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
2019              options | g_notempty, use_offsets, use_size_offsets);  
2020            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
2021              {            options|g_notempty, use_offsets, use_size_offsets,
2022              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2023              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
2024          }          }
2025    
2026        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1305  while (!done) Line 2042  while (!done)
2042        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2043        value of match_limit. */        value of match_limit. */
2044    
2045        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
2046          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
2047            {
2048            int workspace[1000];
2049            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2050              options | g_notempty, use_offsets, use_size_offsets, workspace,
2051              sizeof(workspace)/sizeof(int));
2052            if (count == 0)
2053              {
2054              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2055              count = use_size_offsets/2;
2056              }
2057            }
2058    #endif
2059    
2060        if (count == 0)        else
2061          {          {
2062          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2063          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2064            if (count == 0)
2065              {
2066              fprintf(outfile, "Matched, but too many substrings\n");
2067              count = use_size_offsets/3;
2068              }
2069          }          }
2070    
2071        /* Matched */        /* Matched */
2072    
2073        if (count >= 0)        if (count >= 0)
2074          {          {
2075          int i;          int i, maxcount;
2076    
2077    #if !defined NODFA
2078            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2079    #endif
2080              maxcount = use_size_offsets/3;
2081    
2082            /* This is a check against a lunatic return value. */
2083    
2084            if (count > maxcount)
2085              {
2086              fprintf(outfile,
2087                "** PCRE error: returned count %d is too big for offset size %d\n",
2088                count, use_size_offsets);
2089              count = use_size_offsets/3;
2090              if (do_g || do_G)
2091                {
2092                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2093                do_g = do_G = FALSE;        /* Break g/G loop */
2094                }
2095              }
2096    
2097          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2098            {            {
2099            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1346  while (!done) Line 2121  while (!done)
2121            {            {
2122            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2123              {              {
2124              char copybuffer[16];              char copybuffer[256];
2125              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2126                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2127              if (rc < 0)              if (rc < 0)
# Line 1356  while (!done) Line 2131  while (!done)
2131              }              }
2132            }            }
2133    
2134            for (copynamesptr = copynames;
2135                 *copynamesptr != 0;
2136                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2137              {
2138              char copybuffer[256];
2139              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2140                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2141              if (rc < 0)
2142                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2143              else
2144                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2145              }
2146    
2147          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2148            {            {
2149            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1368  while (!done) Line 2156  while (!done)
2156              else              else
2157                {                {
2158                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2159                pcre_free_substring(substring);                pcre_free_substring(substring);
2160                }                }
2161              }              }
2162            }            }
2163    
2164            for (getnamesptr = getnames;
2165                 *getnamesptr != 0;
2166                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2167              {
2168              const char *substring;
2169              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2170                count, (char *)getnamesptr, &substring);
2171              if (rc < 0)
2172                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2173              else
2174                {
2175                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2176                pcre_free_substring(substring);
2177                }
2178              }
2179    
2180          if (getlist)          if (getlist)
2181            {            {
2182            const char **stringlist;            const char **stringlist;
# Line 1393  while (!done) Line 2196  while (!done)
2196            }            }
2197          }          }
2198    
2199          /* There was a partial match */
2200    
2201          else if (count == PCRE_ERROR_PARTIAL)
2202            {
2203            fprintf(outfile, "Partial match");
2204    #if !defined NODFA
2205            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2206              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2207                bptr + use_offsets[0]);
2208    #endif
2209            fprintf(outfile, "\n");
2210            break;  /* Out of the /g loop */
2211            }
2212    
2213        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2214        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
2215        We want to advance the start offset, and continue. In the case of UTF-8        We want to advance the start offset, and continue. In the case of UTF-8
# Line 1459  while (!done) Line 2276  while (!done)
2276          len -= use_offsets[1];          len -= use_offsets[1];
2277          }          }
2278        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2279    
2280        NEXT_DATA: continue;
2281      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2282    
2283    CONTINUE:    CONTINUE:
# Line 1467  while (!done) Line 2286  while (!done)
2286    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2287  #endif  #endif
2288    
2289    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2290    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2291    if (tables != NULL)    if (tables != NULL)
2292      {      {
2293      free((void *)tables);      new_free((void *)tables);
2294      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2295        locale_set = 0;
2296      }      }
2297    }    }
2298    
2299  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2300  return 0;  
2301    EXIT:
2302    
2303    if (infile != NULL && infile != stdin) fclose(infile);
2304    if (outfile != NULL && outfile != stdout) fclose(outfile);
2305    
2306    free(buffer);
2307    free(dbuffer);
2308    free(pbuffer);
2309    free(offsets);
2310    
2311    return yield;
2312  }  }
2313    
2314  /* End */  /* End of pcretest.c */

Legend:
Removed from v.73  
changed lines
  Added in v.123

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12