/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC revision 169 by ph10, Mon Jun 4 10:49:21 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places.  been extended and consequently is now rather, er, *very* untidy in places.
8    
9  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
10  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 44  POSSIBILITY OF SUCH DAMAGE.
44  #include <locale.h>  #include <locale.h>
45  #include <errno.h>  #include <errno.h>
46    
 /* We need the internal info for displaying the results of pcre_study(). Also  
 for getting the opcodes for showing compiled code. */  
47    
48  #define PCRE_SPY        /* For Win32 build, import data, not export */  /* A number of things vary for Windows builds. Originally, pcretest opened its
49  #include "internal.h"  input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
69    
70    /* We have to include pcre_internal.h because we need the internal info for
71    displaying the results of pcre_study() and we also need to know about the
72    internal macros, structures, and other internal data values; pcretest has
73    "inside information" compared to a program that strictly follows the PCRE API.
74    
75    Although pcre_internal.h does itself include pcre.h, we explicitly include it
76    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
77    appropriately for an application, not for building PCRE. */
78    
79    #include "pcre.h"
80    #include "pcre_internal.h"
81    
82    /* We need access to the data tables that PCRE uses. So as not to have to keep
83    two copies, we include the source file here, changing the names of the external
84    symbols to prevent clashes. */
85    
86    #define _pcre_utf8_table1      utf8_table1
87    #define _pcre_utf8_table1_size utf8_table1_size
88    #define _pcre_utf8_table2      utf8_table2
89    #define _pcre_utf8_table3      utf8_table3
90    #define _pcre_utf8_table4      utf8_table4
91    #define _pcre_utt              utt
92    #define _pcre_utt_size         utt_size
93    #define _pcre_OP_lengths       OP_lengths
94    
95    #include "pcre_tables.c"
96    
97    /* We also need the pcre_printint() function for printing out compiled
98    patterns. This function is in a separate file so that it can be included in
99    pcre_compile.c when that module is compiled with debugging enabled.
100    
101    The definition of the macro PRINTABLE, which determines whether to print an
102    output character as-is or as a hex value when showing compiled patterns, is
103    contained in this file. We uses it here also, in cases when the locale has not
104    been explicitly changed, so as to get consistent output from systems that
105    differ in their output from isprint() even in the "C" locale. */
106    
107    #include "pcre_printint.src"
108    
109    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
110    
111    
112  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
113  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 58  Makefile. */ Line 117  Makefile. */
117  #include "pcreposix.h"  #include "pcreposix.h"
118  #endif  #endif
119    
120    /* It is also possible, for the benefit of the version currently imported into
121    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
122    interface to the DFA matcher (NODFA), and without the doublecheck of the old
123    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
124    UTF8 support if PCRE is built without it. */
125    
126    #ifndef SUPPORT_UTF8
127    #ifndef NOUTF8
128    #define NOUTF8
129    #endif
130    #endif
131    
132    
133    /* Other parameters */
134    
135  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
136  #ifdef CLK_TCK  #ifdef CLK_TCK
137  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 66  Makefile. */ Line 140  Makefile. */
140  #endif  #endif
141  #endif  #endif
142    
143  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
144    
145  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
146    
147    /* Static variables */
148    
149  static FILE *outfile;  static FILE *outfile;
150  static int log_store = 0;  static int log_store = 0;
# Line 80  static int callout_extra; Line 153  static int callout_extra;
153  static int callout_fail_count;  static int callout_fail_count;
154  static int callout_fail_id;  static int callout_fail_id;
155  static int first_callout;  static int first_callout;
156    static int locale_set = 0;
157  static int show_malloc;  static int show_malloc;
158  static int use_utf8;  static int use_utf8;
159  static size_t gotten_store;  static size_t gotten_store;
160    
161    /* The buffers grow automatically if very long input lines are encountered. */
162    
163    static int buffer_size = 50000;
164    static uschar *buffer = NULL;
165    static uschar *dbuffer = NULL;
166  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
167    
168    
 static const int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
169    
170  static const int utf8_table2[] = {  /*************************************************
171    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  *        Read or extend an input line            *
172    *************************************************/
173    
174    /* Input lines are read into buffer, but both patterns and data lines can be
175    continued over multiple input lines. In addition, if the buffer fills up, we
176    want to automatically expand it so as to be able to handle extremely large
177    lines that are needed for certain stress tests. When the input buffer is
178    expanded, the other two buffers must also be expanded likewise, and the
179    contents of pbuffer, which are a copy of the input for callouts, must be
180    preserved (for when expansion happens for a data line). This is not the most
181    optimal way of handling this, but hey, this is just a test program!
182    
183    Arguments:
184      f            the file to read
185      start        where in buffer to start (this *must* be within buffer)
186    
187    Returns:       pointer to the start of new data
188                   could be a copy of start, or could be moved
189                   NULL if no data read and EOF reached
190    */
191    
192  static const int utf8_table3[] = {  static uschar *
193    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  extend_inputline(FILE *f, uschar *start)
194    {
195    uschar *here = start;
196    
197    for (;;)
198      {
199      int rlen = buffer_size - (here - buffer);
200    
201      if (rlen > 1000)
202        {
203        int dlen;
204        if (fgets((char *)here, rlen,  f) == NULL)
205          return (here == start)? NULL : start;
206        dlen = (int)strlen((char *)here);
207        if (dlen > 0 && here[dlen - 1] == '\n') return start;
208        here += dlen;
209        }
210    
211      else
212        {
213        int new_buffer_size = 2*buffer_size;
214        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
215        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
216        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
217    
218        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
219          {
220          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
221          exit(1);
222          }
223    
224        memcpy(new_buffer, buffer, buffer_size);
225        memcpy(new_pbuffer, pbuffer, buffer_size);
226    
227        buffer_size = new_buffer_size;
228    
229        start = new_buffer + (start - buffer);
230        here = new_buffer + (here - buffer);
231    
232        free(buffer);
233        free(dbuffer);
234        free(pbuffer);
235    
236        buffer = new_buffer;
237        dbuffer = new_dbuffer;
238        pbuffer = new_pbuffer;
239        }
240      }
241    
242    return NULL;  /* Control never gets here */
243    }
244    
245    
246    
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
247    
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates  
 Unicode property names to numbers; this is kept in a separate file. */  
   
 static uschar OP_lengths[] = { OP_LENGTHS };  
   
 #include "ucp.h"  
 #include "ucptypetable.c"  
 #include "printint.c"  
248    
249    
250    
# Line 122  static uschar OP_lengths[] = { OP_LENGTH Line 254  static uschar OP_lengths[] = { OP_LENGTH
254    
255  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
256  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
257  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
258    
259  Arguments:  Arguments:
260    str           string to be converted    str           string to be converted
# Line 143  return(result); Line 275  return(result);
275    
276    
277    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
278    
279  /*************************************************  /*************************************************
280  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 188  return i + 1; Line 284  return i + 1;
284  and returns the value of the character.  and returns the value of the character.
285    
286  Argument:  Argument:
287    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
288    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
289    
290  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
291             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
292  */  */
293    
294    #if !defined NOUTF8
295    
296  static int  static int
297  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
298  {  {
299  int c = *buffer++;  int c = *utf8bytes++;
300  int d = c;  int d = c;
301  int i, j, s;  int i, j, s;
302    
# Line 218  d = (c & utf8_table3[i]) << s; Line 316  d = (c & utf8_table3[i]) << s;
316    
317  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
318    {    {
319    c = *buffer++;    c = *utf8bytes++;
320    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
321    s -= 6;    s -= 6;
322    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 226  for (j = 0; j < i; j++) Line 324  for (j = 0; j < i; j++)
324    
325  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
326    
327  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
328    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
329  if (j != i) return -(i+1);  if (j != i) return -(i+1);
330    
# Line 236  if (j != i) return -(i+1); Line 334  if (j != i) return -(i+1);
334  return i+1;  return i+1;
335  }  }
336    
337    #endif
338    
339    
340    
341    /*************************************************
342    *       Convert character value to UTF-8         *
343    *************************************************/
344    
345    /* This function takes an integer value in the range 0 - 0x7fffffff
346    and encodes it as a UTF-8 character in 0 to 6 bytes.
347    
348    Arguments:
349      cvalue     the character value
350      utf8bytes  pointer to buffer for result - at least 6 bytes long
351    
352    Returns:     number of characters placed in the buffer
353    */
354    
355    #if !defined NOUTF8
356    
357    static int
358    ord2utf8(int cvalue, uschar *utf8bytes)
359    {
360    register int i, j;
361    for (i = 0; i < utf8_table1_size; i++)
362      if (cvalue <= utf8_table1[i]) break;
363    utf8bytes += i;
364    for (j = i; j > 0; j--)
365     {
366     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
367     cvalue >>= 6;
368     }
369    *utf8bytes = utf8_table2[i] | cvalue;
370    return i + 1;
371    }
372    
373    #endif
374    
375    
376    
377  /*************************************************  /*************************************************
# Line 248  chars without printing. */ Line 384  chars without printing. */
384    
385  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
386  {  {
387  int c;  int c = 0;
388  int yield = 0;  int yield = 0;
389    
390  while (length-- > 0)  while (length-- > 0)
391    {    {
392    #if !defined NOUTF8
393    if (use_utf8)    if (use_utf8)
394      {      {
395      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 261  while (length-- > 0) Line 398  while (length-- > 0)
398        {        {
399        length -= rc - 1;        length -= rc - 1;
400        p += rc;        p += rc;
401        if (c < 256 && isprint(c))        if (PRINTHEX(c))
402          {          {
403          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
404          yield++;          yield++;
405          }          }
406        else        else
407          {          {
408          int n;          int n = 4;
409          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
410          yield += n;          yield += (n <= 0x000000ff)? 2 :
411                     (n <= 0x00000fff)? 3 :
412                     (n <= 0x0000ffff)? 4 :
413                     (n <= 0x000fffff)? 5 : 6;
414          }          }
415        continue;        continue;
416        }        }
417      }      }
418    #endif
419    
420     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
421    
422    if (isprint(c = *(p++)))    c = *p++;
423      if (PRINTHEX(c))
424      {      {
425      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
426      yield++;      yield++;
# Line 403  static void *new_malloc(size_t size) Line 545  static void *new_malloc(size_t size)
545  void *block = malloc(size);  void *block = malloc(size);
546  gotten_store = size;  gotten_store = size;
547  if (show_malloc)  if (show_malloc)
548    fprintf(outfile, "malloc       %3d %p\n", size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
549  return block;  return block;
550  }  }
551    
# Line 421  static void *stack_malloc(size_t size) Line 563  static void *stack_malloc(size_t size)
563  {  {
564  void *block = malloc(size);  void *block = malloc(size);
565  if (show_malloc)  if (show_malloc)
566    fprintf(outfile, "stack_malloc %3d %p\n", size, block);    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
567  return block;  return block;
568  }  }
569    
# Line 452  if ((rc = pcre_fullinfo(re, study, optio Line 594  if ((rc = pcre_fullinfo(re, study, optio
594  *         Byte flipping function                 *  *         Byte flipping function                 *
595  *************************************************/  *************************************************/
596    
597  static long int  static unsigned long int
598  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
599  {  {
600  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
601  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 466  return ((value & 0x000000ff) << 24) | Line 608  return ((value & 0x000000ff) << 24) |
608    
609    
610  /*************************************************  /*************************************************
611    *        Check match or recursion limit          *
612    *************************************************/
613    
614    static int
615    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
616      int start_offset, int options, int *use_offsets, int use_size_offsets,
617      int flag, unsigned long int *limit, int errnumber, const char *msg)
618    {
619    int count;
620    int min = 0;
621    int mid = 64;
622    int max = -1;
623    
624    extra->flags |= flag;
625    
626    for (;;)
627      {
628      *limit = mid;
629    
630      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
631        use_offsets, use_size_offsets);
632    
633      if (count == errnumber)
634        {
635        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
636        min = mid;
637        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
638        }
639    
640      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
641                             count == PCRE_ERROR_PARTIAL)
642        {
643        if (mid == min + 1)
644          {
645          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
646          break;
647          }
648        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
649        max = mid;
650        mid = (min + mid)/2;
651        }
652      else break;    /* Some other error */
653      }
654    
655    extra->flags &= ~flag;
656    return count;
657    }
658    
659    
660    
661    /*************************************************
662    *         Check newline indicator                *
663    *************************************************/
664    
665    /* This is used both at compile and run-time to check for <xxx> escapes, where
666    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
667    no match.
668    
669    Arguments:
670      p           points after the leading '<'
671      f           file for error message
672    
673    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
674    */
675    
676    static int
677    check_newline(uschar *p, FILE *f)
678    {
679    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
680    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
681    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
682    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
683    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
684    fprintf(f, "Unknown newline type at: <%s\n", p);
685    return 0;
686    }
687    
688    
689    
690    /*************************************************
691    *             Usage function                     *
692    *************************************************/
693    
694    static void
695    usage(void)
696    {
697    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
698    printf("  -b       show compiled code (bytecode)\n");
699    printf("  -C       show PCRE compile-time options and exit\n");
700    printf("  -d       debug: show compiled code and information (-b and -i)\n");
701    #if !defined NODFA
702    printf("  -dfa     force DFA matching for all subjects\n");
703    #endif
704    printf("  -help    show usage information\n");
705    printf("  -i       show information about compiled patterns\n"
706           "  -m       output memory used information\n"
707           "  -o <n>   set size of offsets vector to <n>\n");
708    #if !defined NOPOSIX
709    printf("  -p       use POSIX interface\n");
710    #endif
711    printf("  -q       quiet: do not output PCRE version number at start\n");
712    printf("  -S <n>   set stack size to <n> megabytes\n");
713    printf("  -s       output store (memory) used information\n"
714           "  -t       time compilation and execution\n");
715    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
716    printf("  -tm      time execution (matching) only\n");
717    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
718    }
719    
720    
721    
722    /*************************************************
723  *                Main Program                    *  *                Main Program                    *
724  *************************************************/  *************************************************/
725    
# Line 480  int options = 0; Line 734  int options = 0;
734  int study_options = 0;  int study_options = 0;
735  int op = 1;  int op = 1;
736  int timeit = 0;  int timeit = 0;
737    int timeitm = 0;
738  int showinfo = 0;  int showinfo = 0;
739  int showstore = 0;  int showstore = 0;
740    int quiet = 0;
741  int size_offsets = 45;  int size_offsets = 45;
742  int size_offsets_max;  int size_offsets_max;
743  int *offsets;  int *offsets = NULL;
744  #if !defined NOPOSIX  #if !defined NOPOSIX
745  int posix = 0;  int posix = 0;
746  #endif  #endif
747  int debug = 0;  int debug = 0;
748  int done = 0;  int done = 0;
749    int all_use_dfa = 0;
750    int yield = 0;
751    int stack_size;
752    
753    /* These vectors store, end-to-end, a list of captured substring names. Assume
754    that 1024 is plenty long enough for the few names we'll be testing. */
755    
756  unsigned char *buffer;  uschar copynames[1024];
757  unsigned char *dbuffer;  uschar getnames[1024];
758    
759    uschar *copynamesptr;
760    uschar *getnamesptr;
761    
762  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
763  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
764    
765  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
766  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
767  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
768    
769  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
770    
771  outfile = stdout;  outfile = stdout;
772    
773    /* The following  _setmode() stuff is some Windows magic that tells its runtime
774    library to translate CRLF into a single LF character. At least, that's what
775    I've been told: never having used Windows I take this all on trust. Originally
776    it set 0x8000, but then I was advised that _O_BINARY was better. */
777    
778    #if defined(_WIN32) || defined(WIN32)
779    _setmode( _fileno( stdout ), _O_BINARY );
780    #endif
781    
782  /* Scan options */  /* Scan options */
783    
784  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 519  while (argc > 1 && argv[op][0] == '-') Line 787  while (argc > 1 && argv[op][0] == '-')
787    
788    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
789      showstore = 1;      showstore = 1;
790    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
791      else if (strcmp(argv[op], "-b") == 0) debug = 1;
792    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
793    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
794    #if !defined NODFA
795      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
796    #endif
797    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
798        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
799          *endptr == 0))          *endptr == 0))
# Line 529  while (argc > 1 && argv[op][0] == '-') Line 801  while (argc > 1 && argv[op][0] == '-')
801      op++;      op++;
802      argc--;      argc--;
803      }      }
804      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
805        {
806        int both = argv[op][2] == 0;
807        int temp;
808        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
809                         *endptr == 0))
810          {
811          timeitm = temp;
812          op++;
813          argc--;
814          }
815        else timeitm = LOOPREPEAT;
816        if (both) timeit = timeitm;
817        }
818      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
819          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
820            *endptr == 0))
821        {
822    #if defined(_WIN32) || defined(WIN32)
823        printf("PCRE: -S not supported on this OS\n");
824        exit(1);
825    #else
826        int rc;
827        struct rlimit rlim;
828        getrlimit(RLIMIT_STACK, &rlim);
829        rlim.rlim_cur = stack_size * 1024 * 1024;
830        rc = setrlimit(RLIMIT_STACK, &rlim);
831        if (rc != 0)
832          {
833        printf("PCRE: setrlimit() failed with error %d\n", rc);
834        exit(1);
835          }
836        op++;
837        argc--;
838    #endif
839        }
840  #if !defined NOPOSIX  #if !defined NOPOSIX
841    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
842  #endif  #endif
# Line 542  while (argc > 1 && argv[op][0] == '-') Line 850  while (argc > 1 && argv[op][0] == '-')
850      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
851      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
852      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
853      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
854          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
855          (rc == -2)? "ANYCRLF" :
856          (rc == -1)? "ANY" : "???");
857      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
858      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
859      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
860      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
861      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
862      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
863        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
864        printf("  Default recursion depth limit = %d\n", rc);
865      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
866      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
867      exit(0);      goto EXIT;
868        }
869      else if (strcmp(argv[op], "-help") == 0 ||
870               strcmp(argv[op], "--help") == 0)
871        {
872        usage();
873        goto EXIT;
874      }      }
875    else    else
876      {      {
877      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
878      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
879      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
880      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
881      }      }
882    op++;    op++;
883    argc--;    argc--;
# Line 580  offsets = (int *)malloc(size_offsets_max Line 890  offsets = (int *)malloc(size_offsets_max
890  if (offsets == NULL)  if (offsets == NULL)
891    {    {
892    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
893      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
894    return 1;    yield = 1;
895      goto EXIT;
896    }    }
897    
898  /* Sort out the input and output files */  /* Sort out the input and output files */
899    
900  if (argc > 1)  if (argc > 1)
901    {    {
902    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
903    if (infile == NULL)    if (infile == NULL)
904      {      {
905      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
906      return 1;      yield = 1;
907        goto EXIT;
908      }      }
909    }    }
910    
911  if (argc > 2)  if (argc > 2)
912    {    {
913    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
914    if (outfile == NULL)    if (outfile == NULL)
915      {      {
916      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
917      return 1;      yield = 1;
918        goto EXIT;
919      }      }
920    }    }
921    
# Line 613  pcre_free = new_free; Line 926  pcre_free = new_free;
926  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
927  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
928    
929  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
930    
931  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
932    
933  /* Main loop */  /* Main loop */
934    
# Line 637  while (!done) Line 950  while (!done)
950    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
951    int do_study = 0;    int do_study = 0;
952    int do_debug = debug;    int do_debug = debug;
953      int debug_lengths = 1;
954    int do_G = 0;    int do_G = 0;
955    int do_g = 0;    int do_g = 0;
956    int do_showinfo = showinfo;    int do_showinfo = showinfo;
957    int do_showrest = 0;    int do_showrest = 0;
958    int do_flip = 0;    int do_flip = 0;
959    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
960    
961    use_utf8 = 0;    use_utf8 = 0;
962    
963    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
964    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
965    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
966    fflush(outfile);    fflush(outfile);
967    
# Line 659  while (!done) Line 973  while (!done)
973    
974    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
975      {      {
976      unsigned long int magic;      unsigned long int magic, get_options;
977      uschar sbuf[8];      uschar sbuf[8];
978      FILE *f;      FILE *f;
979    
# Line 707  while (!done) Line 1021  while (!done)
1021    
1022      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1023    
1024      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1025      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1026    
1027      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1028    
# Line 752  while (!done) Line 1066  while (!done)
1066      }      }
1067    
1068    pp = p;    pp = p;
1069      poffset = p - buffer;
1070    
1071    for(;;)    for(;;)
1072      {      {
# Line 762  while (!done) Line 1077  while (!done)
1077        pp++;        pp++;
1078        }        }
1079      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1080      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1081      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1082        {        {
1083        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1084        done = 1;        done = 1;
# Line 780  while (!done) Line 1087  while (!done)
1087      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1088      }      }
1089    
1090      /* The buffer may have moved while being extended; reset the start of data
1091      pointer to the correct relative point in the buffer. */
1092    
1093      p = buffer + poffset;
1094    
1095    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1096    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1097    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 802  while (!done) Line 1114  while (!done)
1114      {      {
1115      switch (*pp++)      switch (*pp++)
1116        {        {
1117          case 'f': options |= PCRE_FIRSTLINE; break;
1118        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1119        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1120        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 810  while (!done) Line 1123  while (!done)
1123    
1124        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1125        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1126          case 'B': do_debug = 1; break;
1127        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1128        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1129        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1130        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1131        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1132        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1133          case 'J': options |= PCRE_DUPNAMES; break;
1134        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1135        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1136    
# Line 826  while (!done) Line 1141  while (!done)
1141        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1142        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1143        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1144          case 'Z': debug_lengths = 0; break;
1145        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1146        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1147    
1148        case 'L':        case 'L':
1149        ppp = pp;        ppp = pp;
1150        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1151          /* The '0' test is just in case this is an unterminated line. */
1152          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1153        *ppp = 0;        *ppp = 0;
1154        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1155          {          {
1156          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1157          goto SKIP_DATA;          goto SKIP_DATA;
1158          }          }
1159          locale_set = 1;
1160        tables = pcre_maketables();        tables = pcre_maketables();
1161        pp = ppp;        pp = ppp;
1162        break;        break;
# Line 849  while (!done) Line 1168  while (!done)
1168        *pp = 0;        *pp = 0;
1169        break;        break;
1170    
1171        case '\n': case ' ': break;        case '<':
1172            {
1173            int x = check_newline(pp, outfile);
1174            if (x == 0) goto SKIP_DATA;
1175            options |= x;
1176            while (*pp++ != '>');
1177            }
1178          break;
1179    
1180          case '\r':                      /* So that it works in Windows */
1181          case '\n':
1182          case ' ':
1183          break;
1184    
1185        default:        default:
1186        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
# Line 869  while (!done) Line 1200  while (!done)
1200    
1201      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1202      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1203        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1204        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1205        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1206    
1207      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1208    
1209      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 876  while (!done) Line 1211  while (!done)
1211    
1212      if (rc != 0)      if (rc != 0)
1213        {        {
1214        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1215        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1216        goto SKIP_DATA;        goto SKIP_DATA;
1217        }        }
# Line 888  while (!done) Line 1223  while (!done)
1223  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1224    
1225      {      {
1226      if (timeit)      if (timeit > 0)
1227        {        {
1228        register int i;        register int i;
1229        clock_t time_taken;        clock_t time_taken;
1230        clock_t start_time = clock();        clock_t start_time = clock();
1231        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1232          {          {
1233          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1234          if (re != NULL) free(re);          if (re != NULL) free(re);
1235          }          }
1236        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1237        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1238          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1239            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1240        }        }
1241    
# Line 917  while (!done) Line 1252  while (!done)
1252          {          {
1253          for (;;)          for (;;)
1254            {            {
1255            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1256              {              {
1257              done = 1;              done = 1;
1258              goto CONTINUE;              goto CONTINUE;
# Line 952  while (!done) Line 1287  while (!done)
1287    
1288      if (do_study)      if (do_study)
1289        {        {
1290        if (timeit)        if (timeit > 0)
1291          {          {
1292          register int i;          register int i;
1293          clock_t time_taken;          clock_t time_taken;
1294          clock_t start_time = clock();          clock_t start_time = clock();
1295          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1296            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1297          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1298          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1299          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1300            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1301              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1302          }          }
1303        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 1005  while (!done) Line 1340  while (!done)
1340    
1341      SHOW_INFO:      SHOW_INFO:
1342    
1343        if (do_debug)
1344          {
1345          fprintf(outfile, "------------------------------------------------------------------\n");
1346          pcre_printint(re, outfile, debug_lengths);
1347          }
1348    
1349      if (do_showinfo)      if (do_showinfo)
1350        {        {
1351        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1352    #if !defined NOINFOCHECK
1353        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1354        int count, backrefmax, first_char, need_char;  #endif
1355          int count, backrefmax, first_char, need_char, okpartial, jchanged;
1356        int nameentrysize, namecount;        int nameentrysize, namecount;
1357        const uschar *nametable;        const uschar *nametable;
1358    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
   
1359        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1360        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1361        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1028  while (!done) Line 1365  while (!done)
1365        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1366        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1367        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1368          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1369          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1370    
1371    #if !defined NOINFOCHECK
1372        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1373        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1374          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 1046  while (!done) Line 1386  while (!done)
1386            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1387              get_options, old_options);              get_options, old_options);
1388          }          }
1389    #endif
1390    
1391        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1392          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1393          size, regex_gotten_store);          (int)size, (int)regex_gotten_store);
1394    
1395        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1396        if (backrefmax > 0)        if (backrefmax > 0)
# Line 1066  while (!done) Line 1407  while (!done)
1407            nametable += nameentrysize;            nametable += nameentrysize;
1408            }            }
1409          }          }
1410    
1411        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
       to fish it out via out back door */  
1412    
1413        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1414        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1415    
1416        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1417          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1418            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1419            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1420            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1421            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1422              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1423            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1424            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1425            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1426            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1427              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1428            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1429            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1430              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1431    
1432          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1433    
1434        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        switch (get_options & PCRE_NEWLINE_BITS)
1435          fprintf(outfile, "Case state changes\n");          {
1436            case PCRE_NEWLINE_CR:
1437            fprintf(outfile, "Forced newline sequence: CR\n");
1438            break;
1439    
1440            case PCRE_NEWLINE_LF:
1441            fprintf(outfile, "Forced newline sequence: LF\n");
1442            break;
1443    
1444            case PCRE_NEWLINE_CRLF:
1445            fprintf(outfile, "Forced newline sequence: CRLF\n");
1446            break;
1447    
1448            case PCRE_NEWLINE_ANYCRLF:
1449            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1450            break;
1451    
1452            case PCRE_NEWLINE_ANY:
1453            fprintf(outfile, "Forced newline sequence: ANY\n");
1454            break;
1455    
1456            default:
1457            break;
1458            }
1459    
1460        if (first_char == -1)        if (first_char == -1)
1461          {          {
1462          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1463          }          }
1464        else if (first_char < 0)        else if (first_char < 0)
1465          {          {
# Line 1108  while (!done) Line 1470  while (!done)
1470          int ch = first_char & 255;          int ch = first_char & 255;
1471          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1472            "" : " (caseless)";            "" : " (caseless)";
1473          if (isprint(ch))          if (PRINTHEX(ch))
1474            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1475          else          else
1476            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1123  while (!done) Line 1485  while (!done)
1485          int ch = need_char & 255;          int ch = need_char & 255;
1486          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1487            "" : " (caseless)";            "" : " (caseless)";
1488          if (isprint(ch))          if (PRINTHEX(ch))
1489            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1490          else          else
1491            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1159  while (!done) Line 1521  while (!done)
1521                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1522                    c = 2;                    c = 2;
1523                    }                    }
1524                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1525                    {                    {
1526                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1527                    c += 2;                    c += 2;
# Line 1218  while (!done) Line 1580  while (!done)
1580                  strerror(errno));                  strerror(errno));
1581                }                }
1582              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1583    
1584              }              }
1585            }            }
1586          fclose(f);          fclose(f);
1587          }          }
1588    
1589          new_free(re);
1590          if (extra != NULL) new_free(extra);
1591          if (tables != NULL) new_free((void *)tables);
1592        continue;  /* With next regex */        continue;  /* With next regex */
1593        }        }
1594      }        /* End of non-POSIX compile */      }        /* End of non-POSIX compile */
# Line 1230  while (!done) Line 1597  while (!done)
1597    
1598    for (;;)    for (;;)
1599      {      {
1600      unsigned char *q;      uschar *q;
1601      unsigned char *bptr = dbuffer;      uschar *bptr;
1602      int *use_offsets = offsets;      int *use_offsets = offsets;
1603      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1604      int callout_data = 0;      int callout_data = 0;
# Line 1244  while (!done) Line 1611  while (!done)
1611      int gmatched = 0;      int gmatched = 0;
1612      int start_offset = 0;      int start_offset = 0;
1613      int g_notempty = 0;      int g_notempty = 0;
1614        int use_dfa = 0;
1615    
1616      options = 0;      options = 0;
1617    
1618        *copynames = 0;
1619        *getnames = 0;
1620    
1621        copynamesptr = copynames;
1622        getnamesptr = getnames;
1623    
1624      pcre_callout = callout;      pcre_callout = callout;
1625      first_callout = 1;      first_callout = 1;
1626      callout_extra = 0;      callout_extra = 0;
# Line 1255  while (!done) Line 1629  while (!done)
1629      callout_fail_id = -1;      callout_fail_id = -1;
1630      show_malloc = 0;      show_malloc = 0;
1631    
1632      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1633      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1634    
1635        len = 0;
1636        for (;;)
1637        {        {
1638        done = 1;        if (infile == stdin) printf("data> ");
1639        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1640            {
1641            if (len > 0) break;
1642            done = 1;
1643            goto CONTINUE;
1644            }
1645          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1646          len = (int)strlen((char *)buffer);
1647          if (buffer[len-1] == '\n') break;
1648        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1649    
     len = (int)strlen((char *)buffer);  
1650      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1651      buffer[len] = 0;      buffer[len] = 0;
1652      if (len == 0) break;      if (len == 0) break;
# Line 1271  while (!done) Line 1654  while (!done)
1654      p = buffer;      p = buffer;
1655      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1656    
1657      q = dbuffer;      bptr = q = dbuffer;
1658      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1659        {        {
1660        int i = 0;        int i = 0;
# Line 1293  while (!done) Line 1676  while (!done)
1676          c -= '0';          c -= '0';
1677          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1678            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1679    
1680    #if !defined NOUTF8
1681            if (use_utf8 && c > 255)
1682              {
1683              unsigned char buff8[8];
1684              int ii, utn;
1685              utn = ord2utf8(c, buff8);
1686              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1687              c = buff8[ii];   /* Last byte */
1688              }
1689    #endif
1690          break;          break;
1691    
1692          case 'x':          case 'x':
1693    
1694          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1695    
1696    #if !defined NOUTF8
1697          if (*p == '{')          if (*p == '{')
1698            {            {
1699            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1317  while (!done) Line 1712  while (!done)
1712              }              }
1713            /* Not correct form; fall through */            /* Not correct form; fall through */
1714            }            }
1715    #endif
1716    
1717          /* Ordinary \x */          /* Ordinary \x */
1718    
# Line 1352  while (!done) Line 1748  while (!done)
1748            }            }
1749          else if (isalnum(*p))          else if (isalnum(*p))
1750            {            {
1751            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1752            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1753              *npp++ = 0;
1754            *npp = 0;            *npp = 0;
1755            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1756            if (n < 0)            if (n < 0)
1757              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1758            else copystrings |= 1 << n;            copynamesptr = npp;
1759            }            }
1760          else if (*p == '+')          else if (*p == '+')
1761            {            {
# Line 1397  while (!done) Line 1793  while (!done)
1793            }            }
1794          continue;          continue;
1795    
1796    #if !defined NODFA
1797            case 'D':
1798    #if !defined NOPOSIX
1799            if (posix || do_posix)
1800              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1801            else
1802    #endif
1803              use_dfa = 1;
1804            continue;
1805    
1806            case 'F':
1807            options |= PCRE_DFA_SHORTEST;
1808            continue;
1809    #endif
1810    
1811          case 'G':          case 'G':
1812          if (isdigit(*p))          if (isdigit(*p))
1813            {            {
# Line 1405  while (!done) Line 1816  while (!done)
1816            }            }
1817          else if (isalnum(*p))          else if (isalnum(*p))
1818            {            {
1819            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1820            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1821              *npp++ = 0;
1822            *npp = 0;            *npp = 0;
1823            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1824            if (n < 0)            if (n < 0)
1825              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1826            else getstrings |= 1 << n;            getnamesptr = npp;
1827            }            }
1828          continue;          continue;
1829    
# Line 1438  while (!done) Line 1849  while (!done)
1849            if (offsets == NULL)            if (offsets == NULL)
1850              {              {
1851              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1852                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1853              return 1;              yield = 1;
1854                goto EXIT;
1855              }              }
1856            }            }
1857          use_size_offsets = n;          use_size_offsets = n;
# Line 1450  while (!done) Line 1862  while (!done)
1862          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1863          continue;          continue;
1864    
1865            case 'Q':
1866            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1867            if (extra == NULL)
1868              {
1869              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1870              extra->flags = 0;
1871              }
1872            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1873            extra->match_limit_recursion = n;
1874            continue;
1875    
1876            case 'q':
1877            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1878            if (extra == NULL)
1879              {
1880              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1881              extra->flags = 0;
1882              }
1883            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1884            extra->match_limit = n;
1885            continue;
1886    
1887    #if !defined NODFA
1888            case 'R':
1889            options |= PCRE_DFA_RESTART;
1890            continue;
1891    #endif
1892    
1893          case 'S':          case 'S':
1894          show_malloc = 1;          show_malloc = 1;
1895          continue;          continue;
# Line 1461  while (!done) Line 1901  while (!done)
1901          case '?':          case '?':
1902          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
1903          continue;          continue;
1904    
1905            case '<':
1906              {
1907              int x = check_newline(p, outfile);
1908              if (x == 0) goto NEXT_DATA;
1909              options |= x;
1910              while (*p++ != '>');
1911              }
1912            continue;
1913          }          }
1914        *q++ = c;        *q++ = c;
1915        }        }
1916      *q = 0;      *q = 0;
1917      len = q - dbuffer;      len = q - dbuffer;
1918    
1919        if ((all_use_dfa || use_dfa) && find_match_limit)
1920          {
1921          printf("**Match limit not relevant for DFA matching: ignored\n");
1922          find_match_limit = 0;
1923          }
1924    
1925      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1926      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1927    
# Line 1485  while (!done) Line 1940  while (!done)
1940    
1941        if (rc != 0)        if (rc != 0)
1942          {          {
1943          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1944          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1945          }          }
1946          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1947                  != 0)
1948            {
1949            fprintf(outfile, "Matched with REG_NOSUB\n");
1950            }
1951        else        else
1952          {          {
1953          size_t i;          size_t i;
# Line 1519  while (!done) Line 1979  while (!done)
1979    
1980      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1981        {        {
1982        if (timeit)        if (timeitm > 0)
1983          {          {
1984          register int i;          register int i;
1985          clock_t time_taken;          clock_t time_taken;
1986          clock_t start_time = clock();          clock_t start_time = clock();
1987          for (i = 0; i < LOOPREPEAT; i++)  
1988    #if !defined NODFA
1989            if (all_use_dfa || use_dfa)
1990              {
1991              int workspace[1000];
1992              for (i = 0; i < timeitm; i++)
1993                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1994                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1995                  sizeof(workspace)/sizeof(int));
1996              }
1997            else
1998    #endif
1999    
2000            for (i = 0; i < timeitm; i++)
2001            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2002              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2003    
2004          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2005          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2006            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2007              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2008          }          }
2009    
2010        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2011        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2012          for the recursion limit. */
2013    
2014        if (find_match_limit)        if (find_match_limit)
2015          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2016          if (extra == NULL)          if (extra == NULL)
2017            {            {
2018            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2019            extra->flags = 0;            extra->flags = 0;
2020            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
2021    
2022          for (;;)          (void)check_match_limit(re, extra, bptr, len, start_offset,
2023            {            options|g_notempty, use_offsets, use_size_offsets,
2024            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2025            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
2026              options | g_notempty, use_offsets, use_size_offsets);  
2027            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
2028              {            options|g_notempty, use_offsets, use_size_offsets,
2029              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2030              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
2031          }          }
2032    
2033        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1597  while (!done) Line 2049  while (!done)
2049        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2050        value of match_limit. */        value of match_limit. */
2051    
2052        else  #if !defined NODFA
2053          else if (all_use_dfa || use_dfa)
2054          {          {
2055          count = pcre_exec(re, extra, (char *)bptr, len,          int workspace[1000];
2056            start_offset, options | g_notempty, use_offsets, use_size_offsets);          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2057              options | g_notempty, use_offsets, use_size_offsets, workspace,
2058              sizeof(workspace)/sizeof(int));
2059            if (count == 0)
2060              {
2061              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2062              count = use_size_offsets/2;
2063              }
2064          }          }
2065    #endif
2066    
2067        if (count == 0)        else
2068          {          {
2069          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2070          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2071            if (count == 0)
2072              {
2073              fprintf(outfile, "Matched, but too many substrings\n");
2074              count = use_size_offsets/3;
2075              }
2076          }          }
2077    
2078        /* Matched */        /* Matched */
2079    
2080        if (count >= 0)        if (count >= 0)
2081          {          {
2082          int i;          int i, maxcount;
2083    
2084    #if !defined NODFA
2085            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2086    #endif
2087              maxcount = use_size_offsets/3;
2088    
2089            /* This is a check against a lunatic return value. */
2090    
2091            if (count > maxcount)
2092              {
2093              fprintf(outfile,
2094                "** PCRE error: returned count %d is too big for offset size %d\n",
2095                count, use_size_offsets);
2096              count = use_size_offsets/3;
2097              if (do_g || do_G)
2098                {
2099                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2100                do_g = do_G = FALSE;        /* Break g/G loop */
2101                }
2102              }
2103    
2104          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2105            {            {
2106            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1641  while (!done) Line 2128  while (!done)
2128            {            {
2129            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2130              {              {
2131              char copybuffer[16];              char copybuffer[256];
2132              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2133                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2134              if (rc < 0)              if (rc < 0)
# Line 1651  while (!done) Line 2138  while (!done)
2138              }              }
2139            }            }
2140    
2141            for (copynamesptr = copynames;
2142                 *copynamesptr != 0;
2143                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2144              {
2145              char copybuffer[256];
2146              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2147                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2148              if (rc < 0)
2149                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2150              else
2151                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2152              }
2153    
2154          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2155            {            {
2156            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1663  while (!done) Line 2163  while (!done)
2163              else              else
2164                {                {
2165                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2166                pcre_free_substring(substring);                pcre_free_substring(substring);
2167                }                }
2168              }              }
2169            }            }
2170    
2171            for (getnamesptr = getnames;
2172                 *getnamesptr != 0;
2173                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2174              {
2175              const char *substring;
2176              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2177                count, (char *)getnamesptr, &substring);
2178              if (rc < 0)
2179                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2180              else
2181                {
2182                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2183                pcre_free_substring(substring);
2184                }
2185              }
2186    
2187          if (getlist)          if (getlist)
2188            {            {
2189            const char **stringlist;            const char **stringlist;
# Line 1692  while (!done) Line 2207  while (!done)
2207    
2208        else if (count == PCRE_ERROR_PARTIAL)        else if (count == PCRE_ERROR_PARTIAL)
2209          {          {
2210          fprintf(outfile, "Partial match\n");          fprintf(outfile, "Partial match");
2211    #if !defined NODFA
2212            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2213              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2214                bptr + use_offsets[0]);
2215    #endif
2216            fprintf(outfile, "\n");
2217          break;  /* Out of the /g loop */          break;  /* Out of the /g loop */
2218          }          }
2219    
2220        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2221        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2222        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2223        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2224        offset values to achieve this. We won't be at the end of the string -  
2225        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2226          "anycrlf". If the previous match was at the end of a line terminated by
2227          CRLF, an advance of one character just passes the \r, whereas we should
2228          prefer the longer newline sequence, as does the code in pcre_exec().
2229          Fudge the offset value to achieve this.
2230    
2231          Otherwise, in the case of UTF-8 matching, the advance must be one
2232          character, not one byte. */
2233    
2234        else        else
2235          {          {
2236          if (g_notempty != 0)          if (g_notempty != 0)
2237            {            {
2238            int onechar = 1;            int onechar = 1;
2239              unsigned int obits = ((real_pcre *)re)->options;
2240            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2241            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2242                {
2243                int d;
2244                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2245                obits = (d == '\r')? PCRE_NEWLINE_CR :
2246                        (d == '\n')? PCRE_NEWLINE_LF :
2247                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2248                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2249                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2250                }
2251              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2252                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2253                  &&
2254                  start_offset < len - 1 &&
2255                  bptr[start_offset] == '\r' &&
2256                  bptr[start_offset+1] == '\n')
2257                onechar++;
2258              else if (use_utf8)
2259              {              {
2260              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2261                {                {
# Line 1744  while (!done) Line 2290  while (!done)
2290        character. */        character. */
2291    
2292        g_notempty = 0;        g_notempty = 0;
2293    
2294        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2295          {          {
2296          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1762  while (!done) Line 2309  while (!done)
2309          len -= use_offsets[1];          len -= use_offsets[1];
2310          }          }
2311        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2312    
2313        NEXT_DATA: continue;
2314      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2315    
2316    CONTINUE:    CONTINUE:
# Line 1770  while (!done) Line 2319  while (!done)
2319    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2320  #endif  #endif
2321    
2322    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2323    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2324    if (tables != NULL)    if (tables != NULL)
2325      {      {
2326      free((void *)tables);      new_free((void *)tables);
2327      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2328        locale_set = 0;
2329      }      }
2330    }    }
2331    
2332  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2333  return 0;  
2334    EXIT:
2335    
2336    if (infile != NULL && infile != stdin) fclose(infile);
2337    if (outfile != NULL && outfile != stdout) fclose(outfile);
2338    
2339    free(buffer);
2340    free(dbuffer);
2341    free(pbuffer);
2342    free(offsets);
2343    
2344    return yield;
2345  }  }
2346    
2347  /* End */  /* End of pcretest.c */

Legend:
Removed from v.75  
changed lines
  Added in v.169

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12