/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 230 by ph10, Mon Sep 10 13:23:56 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
 #define PCRE_SPY        /* For Win32 build, import data, not export */  
51    
52  /* We include pcre_internal.h because we need the internal info for displaying  /* A number of things vary for Windows builds. Originally, pcretest opened its
53  the results of pcre_study() and we also need to know about the internal  input and output without "b"; then I was told that "b" was needed in some
54  macros, structures, and other internal data values; pcretest has "inside  environments, so it was added for release 5.0 to both the input and output. (It
55  information" compared to a program that strictly follows the PCRE API. */  makes no difference on Unix-like systems.) Later I was told that it is wrong
56    for the input on Windows. I've now abstracted the modes into two macros that
57    are set here, to make it easier to fiddle with them, and removed "b" from the
58    input mode under Windows. */
59    
60    #if defined(_WIN32) || defined(WIN32)
61    #include <io.h>                /* For _setmode() */
62    #include <fcntl.h>             /* For _O_BINARY */
63    #define INPUT_MODE   "r"
64    #define OUTPUT_MODE  "wb"
65    
66    #else
67    #include <sys/time.h>          /* These two includes are needed */
68    #include <sys/resource.h>      /* for setrlimit(). */
69    #define INPUT_MODE   "rb"
70    #define OUTPUT_MODE  "wb"
71    #endif
72    
73    
74    /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76    internal macros, structures, and other internal data values; pcretest has
77    "inside information" compared to a program that strictly follows the PCRE API.
78    
79    Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84  #include "pcre_internal.h"  #include "pcre_internal.h"
85    
86  /* We need access to the data tables that PCRE uses. So as not to have to keep  /* We need access to the data tables that PCRE uses. So as not to have to keep
# Line 70  symbols to prevent clashes. */ Line 100  symbols to prevent clashes. */
100    
101  /* We also need the pcre_printint() function for printing out compiled  /* We also need the pcre_printint() function for printing out compiled
102  patterns. This function is in a separate file so that it can be included in  patterns. This function is in a separate file so that it can be included in
103  pcre_compile.c when that module is compiled with debugging enabled. */  pcre_compile.c when that module is compiled with debugging enabled.
104    
105    The definition of the macro PRINTABLE, which determines whether to print an
106    output character as-is or as a hex value when showing compiled patterns, is
107    contained in this file. We uses it here also, in cases when the locale has not
108    been explicitly changed, so as to get consistent output from systems that
109    differ in their output from isprint() even in the "C" locale. */
110    
111  #include "pcre_printint.src"  #include "pcre_printint.src"
112    
113    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114    
115    
116  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
117  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 83  Makefile. */ Line 121  Makefile. */
121  #include "pcreposix.h"  #include "pcreposix.h"
122  #endif  #endif
123    
124  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
125  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
127  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137  /* Other parameters */  /* Other parameters */
# Line 99  function (define NOINFOCHECK). */ Line 144  function (define NOINFOCHECK). */
144  #endif  #endif
145  #endif  #endif
146    
147  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
   
 #define BUFFER_SIZE 30000  
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
148    
149    #define LOOPREPEAT 500000
150    
151  /* Static variables */  /* Static variables */
152    
# Line 114  static int callout_count; Line 156  static int callout_count;
156  static int callout_extra;  static int callout_extra;
157  static int callout_fail_count;  static int callout_fail_count;
158  static int callout_fail_id;  static int callout_fail_id;
159    static int debug_lengths;
160  static int first_callout;  static int first_callout;
161    static int locale_set = 0;
162  static int show_malloc;  static int show_malloc;
163  static int use_utf8;  static int use_utf8;
164  static size_t gotten_store;  static size_t gotten_store;
165    
166    /* The buffers grow automatically if very long input lines are encountered. */
167    
168    static int buffer_size = 50000;
169    static uschar *buffer = NULL;
170    static uschar *dbuffer = NULL;
171  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
172    
173    
174    
175  /*************************************************  /*************************************************
176    *        Read or extend an input line            *
177    *************************************************/
178    
179    /* Input lines are read into buffer, but both patterns and data lines can be
180    continued over multiple input lines. In addition, if the buffer fills up, we
181    want to automatically expand it so as to be able to handle extremely large
182    lines that are needed for certain stress tests. When the input buffer is
183    expanded, the other two buffers must also be expanded likewise, and the
184    contents of pbuffer, which are a copy of the input for callouts, must be
185    preserved (for when expansion happens for a data line). This is not the most
186    optimal way of handling this, but hey, this is just a test program!
187    
188    Arguments:
189      f            the file to read
190      start        where in buffer to start (this *must* be within buffer)
191    
192    Returns:       pointer to the start of new data
193                   could be a copy of start, or could be moved
194                   NULL if no data read and EOF reached
195    */
196    
197    static uschar *
198    extend_inputline(FILE *f, uschar *start)
199    {
200    uschar *here = start;
201    
202    for (;;)
203      {
204      int rlen = buffer_size - (here - buffer);
205    
206      if (rlen > 1000)
207        {
208        int dlen;
209        if (fgets((char *)here, rlen,  f) == NULL)
210          return (here == start)? NULL : start;
211        dlen = (int)strlen((char *)here);
212        if (dlen > 0 && here[dlen - 1] == '\n') return start;
213        here += dlen;
214        }
215    
216      else
217        {
218        int new_buffer_size = 2*buffer_size;
219        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222    
223        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224          {
225          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226          exit(1);
227          }
228    
229        memcpy(new_buffer, buffer, buffer_size);
230        memcpy(new_pbuffer, pbuffer, buffer_size);
231    
232        buffer_size = new_buffer_size;
233    
234        start = new_buffer + (start - buffer);
235        here = new_buffer + (here - buffer);
236    
237        free(buffer);
238        free(dbuffer);
239        free(pbuffer);
240    
241        buffer = new_buffer;
242        dbuffer = new_dbuffer;
243        pbuffer = new_pbuffer;
244        }
245      }
246    
247    return NULL;  /* Control never gets here */
248    }
249    
250    
251    
252    
253    
254    
255    
256    /*************************************************
257  *          Read number from string               *  *          Read number from string               *
258  *************************************************/  *************************************************/
259    
260  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
262  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
263    
264  Arguments:  Arguments:
265    str           string to be converted    str           string to be converted
# Line 159  return(result); Line 289  return(result);
289  and returns the value of the character.  and returns the value of the character.
290    
291  Argument:  Argument:
292    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
293    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
294    
295  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
296             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
297  */  */
298    
299  #if !defined NOUTF8  #if !defined NOUTF8
300    
301  static int  static int
302  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
303  {  {
304  int c = *buffer++;  int c = *utf8bytes++;
305  int d = c;  int d = c;
306  int i, j, s;  int i, j, s;
307    
# Line 191  d = (c & utf8_table3[i]) << s; Line 321  d = (c & utf8_table3[i]) << s;
321    
322  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
323    {    {
324    c = *buffer++;    c = *utf8bytes++;
325    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
326    s -= 6;    s -= 6;
327    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 222  and encodes it as a UTF-8 character in 0 Line 352  and encodes it as a UTF-8 character in 0
352    
353  Arguments:  Arguments:
354    cvalue     the character value    cvalue     the character value
355    buffer     pointer to buffer for result - at least 6 bytes long    utf8bytes  pointer to buffer for result - at least 6 bytes long
356    
357  Returns:     number of characters placed in the buffer  Returns:     number of characters placed in the buffer
358  */  */
359    
360    #if !defined NOUTF8
361    
362  static int  static int
363  ord2utf8(int cvalue, uschar *buffer)  ord2utf8(int cvalue, uschar *utf8bytes)
364  {  {
365  register int i, j;  register int i, j;
366  for (i = 0; i < utf8_table1_size; i++)  for (i = 0; i < utf8_table1_size; i++)
367    if (cvalue <= utf8_table1[i]) break;    if (cvalue <= utf8_table1[i]) break;
368  buffer += i;  utf8bytes += i;
369  for (j = i; j > 0; j--)  for (j = i; j > 0; j--)
370   {   {
371   *buffer-- = 0x80 | (cvalue & 0x3f);   *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372   cvalue >>= 6;   cvalue >>= 6;
373   }   }
374  *buffer = utf8_table2[i] | cvalue;  *utf8bytes = utf8_table2[i] | cvalue;
375  return i + 1;  return i + 1;
376  }  }
377    
378    #endif
379    
380    
381    
382  /*************************************************  /*************************************************
# Line 269  while (length-- > 0) Line 403  while (length-- > 0)
403        {        {
404        length -= rc - 1;        length -= rc - 1;
405        p += rc;        p += rc;
406        if (c < 256 && isprint(c))        if (PRINTHEX(c))
407          {          {
408          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
409          yield++;          yield++;
410          }          }
411        else        else
412          {          {
413          int n;          int n = 4;
414          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
415          yield += n;          yield += (n <= 0x000000ff)? 2 :
416                     (n <= 0x00000fff)? 3 :
417                     (n <= 0x0000ffff)? 4 :
418                     (n <= 0x000fffff)? 5 : 6;
419          }          }
420        continue;        continue;
421        }        }
# Line 287  while (length-- > 0) Line 424  while (length-- > 0)
424    
425     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
426    
427    if (isprint(c = *(p++)))    c = *p++;
428      if (PRINTHEX(c))
429      {      {
430      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
431      yield++;      yield++;
# Line 461  if ((rc = pcre_fullinfo(re, study, optio Line 599  if ((rc = pcre_fullinfo(re, study, optio
599  *         Byte flipping function                 *  *         Byte flipping function                 *
600  *************************************************/  *************************************************/
601    
602  static long int  static unsigned long int
603  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
604  {  {
605  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 475  return ((value & 0x000000ff) << 24) | Line 613  return ((value & 0x000000ff) << 24) |
613    
614    
615  /*************************************************  /*************************************************
616    *        Check match or recursion limit          *
617    *************************************************/
618    
619    static int
620    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621      int start_offset, int options, int *use_offsets, int use_size_offsets,
622      int flag, unsigned long int *limit, int errnumber, const char *msg)
623    {
624    int count;
625    int min = 0;
626    int mid = 64;
627    int max = -1;
628    
629    extra->flags |= flag;
630    
631    for (;;)
632      {
633      *limit = mid;
634    
635      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636        use_offsets, use_size_offsets);
637    
638      if (count == errnumber)
639        {
640        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641        min = mid;
642        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643        }
644    
645      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646                             count == PCRE_ERROR_PARTIAL)
647        {
648        if (mid == min + 1)
649          {
650          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651          break;
652          }
653        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654        max = mid;
655        mid = (min + mid)/2;
656        }
657      else break;    /* Some other error */
658      }
659    
660    extra->flags &= ~flag;
661    return count;
662    }
663    
664    
665    
666    /*************************************************
667    *         Case-independent strncmp() function    *
668    *************************************************/
669    
670    /*
671    Arguments:
672      s         first string
673      t         second string
674      n         number of characters to compare
675    
676    Returns:    < 0, = 0, or > 0, according to the comparison
677    */
678    
679    static int
680    strncmpic(uschar *s, uschar *t, int n)
681    {
682    while (n--)
683      {
684      int c = tolower(*s++) - tolower(*t++);
685      if (c) return c;
686      }
687    return 0;
688    }
689    
690    
691    
692    /*************************************************
693    *         Check newline indicator                *
694    *************************************************/
695    
696    /* This is used both at compile and run-time to check for <xxx> escapes, where
697    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698    no match.
699    
700    Arguments:
701      p           points after the leading '<'
702      f           file for error message
703    
704    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
705    */
706    
707    static int
708    check_newline(uschar *p, FILE *f)
709    {
710    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715    fprintf(f, "Unknown newline type at: <%s\n", p);
716    return 0;
717    }
718    
719    
720    
721    /*************************************************
722    *             Usage function                     *
723    *************************************************/
724    
725    static void
726    usage(void)
727    {
728    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
729    printf("  -b       show compiled code (bytecode)\n");
730    printf("  -C       show PCRE compile-time options and exit\n");
731    printf("  -d       debug: show compiled code and information (-b and -i)\n");
732    #if !defined NODFA
733    printf("  -dfa     force DFA matching for all subjects\n");
734    #endif
735    printf("  -help    show usage information\n");
736    printf("  -i       show information about compiled patterns\n"
737           "  -m       output memory used information\n"
738           "  -o <n>   set size of offsets vector to <n>\n");
739    #if !defined NOPOSIX
740    printf("  -p       use POSIX interface\n");
741    #endif
742    printf("  -q       quiet: do not output PCRE version number at start\n");
743    printf("  -S <n>   set stack size to <n> megabytes\n");
744    printf("  -s       output store (memory) used information\n"
745           "  -t       time compilation and execution\n");
746    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
747    printf("  -tm      time execution (matching) only\n");
748    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
749    }
750    
751    
752    
753    /*************************************************
754  *                Main Program                    *  *                Main Program                    *
755  *************************************************/  *************************************************/
756    
# Line 489  int options = 0; Line 765  int options = 0;
765  int study_options = 0;  int study_options = 0;
766  int op = 1;  int op = 1;
767  int timeit = 0;  int timeit = 0;
768    int timeitm = 0;
769  int showinfo = 0;  int showinfo = 0;
770  int showstore = 0;  int showstore = 0;
771    int quiet = 0;
772  int size_offsets = 45;  int size_offsets = 45;
773  int size_offsets_max;  int size_offsets_max;
774  int *offsets = NULL;  int *offsets = NULL;
# Line 501  int debug = 0; Line 779  int debug = 0;
779  int done = 0;  int done = 0;
780  int all_use_dfa = 0;  int all_use_dfa = 0;
781  int yield = 0;  int yield = 0;
782    int stack_size;
783    
784  unsigned char *buffer;  /* These vectors store, end-to-end, a list of captured substring names. Assume
785  unsigned char *dbuffer;  that 1024 is plenty long enough for the few names we'll be testing. */
786    
787    uschar copynames[1024];
788    uschar getnames[1024];
789    
790    uschar *copynamesptr;
791    uschar *getnamesptr;
792    
793  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
794  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
795    
796  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
797  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
798  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
799    
800  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
801    
802  outfile = stdout;  outfile = stdout;
803    
804    /* The following  _setmode() stuff is some Windows magic that tells its runtime
805    library to translate CRLF into a single LF character. At least, that's what
806    I've been told: never having used Windows I take this all on trust. Originally
807    it set 0x8000, but then I was advised that _O_BINARY was better. */
808    
809    #if defined(_WIN32) || defined(WIN32)
810    _setmode( _fileno( stdout ), _O_BINARY );
811    #endif
812    
813  /* Scan options */  /* Scan options */
814    
815  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 530  while (argc > 1 && argv[op][0] == '-') Line 818  while (argc > 1 && argv[op][0] == '-')
818    
819    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
820      showstore = 1;      showstore = 1;
821    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
822      else if (strcmp(argv[op], "-b") == 0) debug = 1;
823    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
824    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
825  #if !defined NODFA  #if !defined NODFA
# Line 543  while (argc > 1 && argv[op][0] == '-') Line 832  while (argc > 1 && argv[op][0] == '-')
832      op++;      op++;
833      argc--;      argc--;
834      }      }
835      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
836        {
837        int both = argv[op][2] == 0;
838        int temp;
839        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
840                         *endptr == 0))
841          {
842          timeitm = temp;
843          op++;
844          argc--;
845          }
846        else timeitm = LOOPREPEAT;
847        if (both) timeit = timeitm;
848        }
849      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
850          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
851            *endptr == 0))
852        {
853    #if defined(_WIN32) || defined(WIN32)
854        printf("PCRE: -S not supported on this OS\n");
855        exit(1);
856    #else
857        int rc;
858        struct rlimit rlim;
859        getrlimit(RLIMIT_STACK, &rlim);
860        rlim.rlim_cur = stack_size * 1024 * 1024;
861        rc = setrlimit(RLIMIT_STACK, &rlim);
862        if (rc != 0)
863          {
864        printf("PCRE: setrlimit() failed with error %d\n", rc);
865        exit(1);
866          }
867        op++;
868        argc--;
869    #endif
870        }
871  #if !defined NOPOSIX  #if !defined NOPOSIX
872    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
873  #endif  #endif
# Line 556  while (argc > 1 && argv[op][0] == '-') Line 881  while (argc > 1 && argv[op][0] == '-')
881      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
882      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
883      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
884      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
885          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
886          (rc == -2)? "ANYCRLF" :
887          (rc == -1)? "ANY" : "???");
888      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
889      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
890      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
891      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
892      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
893      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
894        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
895        printf("  Default recursion depth limit = %d\n", rc);
896      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
897      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
898      exit(0);      goto EXIT;
899        }
900      else if (strcmp(argv[op], "-help") == 0 ||
901               strcmp(argv[op], "--help") == 0)
902        {
903        usage();
904        goto EXIT;
905      }      }
906    else    else
907      {      {
908      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
909      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
910      yield = 1;      yield = 1;
911      goto EXIT;      goto EXIT;
912      }      }
# Line 598  offsets = (int *)malloc(size_offsets_max Line 921  offsets = (int *)malloc(size_offsets_max
921  if (offsets == NULL)  if (offsets == NULL)
922    {    {
923    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
924      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
925    yield = 1;    yield = 1;
926    goto EXIT;    goto EXIT;
927    }    }
# Line 607  if (offsets == NULL) Line 930  if (offsets == NULL)
930    
931  if (argc > 1)  if (argc > 1)
932    {    {
933    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
934    if (infile == NULL)    if (infile == NULL)
935      {      {
936      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 618  if (argc > 1) Line 941  if (argc > 1)
941    
942  if (argc > 2)  if (argc > 2)
943    {    {
944    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
945    if (outfile == NULL)    if (outfile == NULL)
946      {      {
947      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 634  pcre_free = new_free; Line 957  pcre_free = new_free;
957  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
958  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
959    
960  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
961    
962  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
963    
964  /* Main loop */  /* Main loop */
965    
# Line 663  while (!done) Line 986  while (!done)
986    int do_showinfo = showinfo;    int do_showinfo = showinfo;
987    int do_showrest = 0;    int do_showrest = 0;
988    int do_flip = 0;    int do_flip = 0;
989    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
990    
991    use_utf8 = 0;    use_utf8 = 0;
992      debug_lengths = 1;
993    
994    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
995    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
996    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
997    fflush(outfile);    fflush(outfile);
998    
# Line 680  while (!done) Line 1004  while (!done)
1004    
1005    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1006      {      {
1007      unsigned long int magic;      unsigned long int magic, get_options;
1008      uschar sbuf[8];      uschar sbuf[8];
1009      FILE *f;      FILE *f;
1010    
# Line 728  while (!done) Line 1052  while (!done)
1052    
1053      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1054    
1055      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1056      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1057    
1058      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1059    
# Line 773  while (!done) Line 1097  while (!done)
1097      }      }
1098    
1099    pp = p;    pp = p;
1100      poffset = p - buffer;
1101    
1102    for(;;)    for(;;)
1103      {      {
# Line 783  while (!done) Line 1108  while (!done)
1108        pp++;        pp++;
1109        }        }
1110      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1111      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1112      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1113        {        {
1114        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1115        done = 1;        done = 1;
# Line 801  while (!done) Line 1118  while (!done)
1118      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1119      }      }
1120    
1121      /* The buffer may have moved while being extended; reset the start of data
1122      pointer to the correct relative point in the buffer. */
1123    
1124      p = buffer + poffset;
1125    
1126    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1127    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1128    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 832  while (!done) Line 1154  while (!done)
1154    
1155        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1156        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1157          case 'B': do_debug = 1; break;
1158        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1159        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1160        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1161        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1162        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1163        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1164          case 'J': options |= PCRE_DUPNAMES; break;
1165        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1166        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1167    
# Line 848  while (!done) Line 1172  while (!done)
1172        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1173        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1174        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1175          case 'Z': debug_lengths = 0; break;
1176        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1177        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1178    
1179        case 'L':        case 'L':
1180        ppp = pp;        ppp = pp;
1181        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1182        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1183          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1184        *ppp = 0;        *ppp = 0;
1185        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1186          {          {
1187          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1188          goto SKIP_DATA;          goto SKIP_DATA;
1189          }          }
1190          locale_set = 1;
1191        tables = pcre_maketables();        tables = pcre_maketables();
1192        pp = ppp;        pp = ppp;
1193        break;        break;
# Line 872  while (!done) Line 1199  while (!done)
1199        *pp = 0;        *pp = 0;
1200        break;        break;
1201    
1202          case '<':
1203            {
1204            int x = check_newline(pp, outfile);
1205            if (x == 0) goto SKIP_DATA;
1206            options |= x;
1207            while (*pp++ != '>');
1208            }
1209          break;
1210    
1211        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1212        case '\n':        case '\n':
1213        case ' ':        case ' ':
# Line 896  while (!done) Line 1232  while (!done)
1232      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1233      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1234      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1235        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1236        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1237    
1238      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1239    
1240      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 903  while (!done) Line 1242  while (!done)
1242    
1243      if (rc != 0)      if (rc != 0)
1244        {        {
1245        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1246        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1247        goto SKIP_DATA;        goto SKIP_DATA;
1248        }        }
# Line 915  while (!done) Line 1254  while (!done)
1254  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1255    
1256      {      {
1257      if (timeit)      if (timeit > 0)
1258        {        {
1259        register int i;        register int i;
1260        clock_t time_taken;        clock_t time_taken;
1261        clock_t start_time = clock();        clock_t start_time = clock();
1262        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1263          {          {
1264          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1265          if (re != NULL) free(re);          if (re != NULL) free(re);
1266          }          }
1267        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1268        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1269          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1270            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1271        }        }
1272    
# Line 944  while (!done) Line 1283  while (!done)
1283          {          {
1284          for (;;)          for (;;)
1285            {            {
1286            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1287              {              {
1288              done = 1;              done = 1;
1289              goto CONTINUE;              goto CONTINUE;
# Line 979  while (!done) Line 1318  while (!done)
1318    
1319      if (do_study)      if (do_study)
1320        {        {
1321        if (timeit)        if (timeit > 0)
1322          {          {
1323          register int i;          register int i;
1324          clock_t time_taken;          clock_t time_taken;
1325          clock_t start_time = clock();          clock_t start_time = clock();
1326          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1327            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1328          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1329          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1330          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1331            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1332              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1333          }          }
1334        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 1010  while (!done) Line 1349  while (!done)
1349        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));        rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1350        rre->size = byteflip(rre->size, sizeof(rre->size));        rre->size = byteflip(rre->size, sizeof(rre->size));
1351        rre->options = byteflip(rre->options, sizeof(rre->options));        rre->options = byteflip(rre->options, sizeof(rre->options));
1352          rre->flags = byteflip(rre->flags, sizeof(rre->flags));
1353        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));        rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1354        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));        rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1355        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));        rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
# Line 1032  while (!done) Line 1372  while (!done)
1372    
1373      SHOW_INFO:      SHOW_INFO:
1374    
1375        if (do_debug)
1376          {
1377          fprintf(outfile, "------------------------------------------------------------------\n");
1378          pcre_printint(re, outfile, debug_lengths);
1379          }
1380    
1381      if (do_showinfo)      if (do_showinfo)
1382        {        {
1383        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1384  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1385        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1386  #endif  #endif
1387        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged,
1388            hascrorlf;
1389        int nameentrysize, namecount;        int nameentrysize, namecount;
1390        const uschar *nametable;        const uschar *nametable;
1391    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         pcre_printint(re, outfile);  
         }  
   
1392        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1393        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1394        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1057  while (!done) Line 1398  while (!done)
1398        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1399        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1400        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1401          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1402          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1403          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1404    
1405  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1406        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1098  while (!done) Line 1442  while (!done)
1442            }            }
1443          }          }
1444    
1445        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1446        to fish it out via out back door */        if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1447    
1448        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1449        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1450    
1451        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1452          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1453            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1454            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1455            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1121  while (!done) Line 1459  while (!done)
1459            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1460            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1461            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1462              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1463            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1464            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1465              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1466    
1467          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1468    
1469          switch (get_options & PCRE_NEWLINE_BITS)
1470            {
1471            case PCRE_NEWLINE_CR:
1472            fprintf(outfile, "Forced newline sequence: CR\n");
1473            break;
1474    
1475            case PCRE_NEWLINE_LF:
1476            fprintf(outfile, "Forced newline sequence: LF\n");
1477            break;
1478    
1479        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_CRLF:
1480          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: CRLF\n");
1481            break;
1482    
1483            case PCRE_NEWLINE_ANYCRLF:
1484            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1485            break;
1486    
1487            case PCRE_NEWLINE_ANY:
1488            fprintf(outfile, "Forced newline sequence: ANY\n");
1489            break;
1490    
1491            default:
1492            break;
1493            }
1494    
1495        if (first_char == -1)        if (first_char == -1)
1496          {          {
1497          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1498          }          }
1499        else if (first_char < 0)        else if (first_char < 0)
1500          {          {
# Line 1140  while (!done) Line 1505  while (!done)
1505          int ch = first_char & 255;          int ch = first_char & 255;
1506          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1507            "" : " (caseless)";            "" : " (caseless)";
1508          if (isprint(ch))          if (PRINTHEX(ch))
1509            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1510          else          else
1511            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1155  while (!done) Line 1520  while (!done)
1520          int ch = need_char & 255;          int ch = need_char & 255;
1521          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1522            "" : " (caseless)";            "" : " (caseless)";
1523          if (isprint(ch))          if (PRINTHEX(ch))
1524            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1525          else          else
1526            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1191  while (!done) Line 1556  while (!done)
1556                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1557                    c = 2;                    c = 2;
1558                    }                    }
1559                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1560                    {                    {
1561                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1562                    c += 2;                    c += 2;
# Line 1250  while (!done) Line 1615  while (!done)
1615                  strerror(errno));                  strerror(errno));
1616                }                }
1617              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1618    
1619              }              }
1620            }            }
1621          fclose(f);          fclose(f);
# Line 1266  while (!done) Line 1632  while (!done)
1632    
1633    for (;;)    for (;;)
1634      {      {
1635      unsigned char *q;      uschar *q;
1636      unsigned char *bptr = dbuffer;      uschar *bptr;
1637      int *use_offsets = offsets;      int *use_offsets = offsets;
1638      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1639      int callout_data = 0;      int callout_data = 0;
# Line 1284  while (!done) Line 1650  while (!done)
1650    
1651      options = 0;      options = 0;
1652    
1653        *copynames = 0;
1654        *getnames = 0;
1655    
1656        copynamesptr = copynames;
1657        getnamesptr = getnames;
1658    
1659      pcre_callout = callout;      pcre_callout = callout;
1660      first_callout = 1;      first_callout = 1;
1661      callout_extra = 0;      callout_extra = 0;
# Line 1292  while (!done) Line 1664  while (!done)
1664      callout_fail_id = -1;      callout_fail_id = -1;
1665      show_malloc = 0;      show_malloc = 0;
1666    
1667      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1668      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1669    
1670        len = 0;
1671        for (;;)
1672        {        {
1673        done = 1;        if (infile == stdin) printf("data> ");
1674        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1675            {
1676            if (len > 0) break;
1677            done = 1;
1678            goto CONTINUE;
1679            }
1680          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1681          len = (int)strlen((char *)buffer);
1682          if (buffer[len-1] == '\n') break;
1683        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1684    
     len = (int)strlen((char *)buffer);  
1685      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1686      buffer[len] = 0;      buffer[len] = 0;
1687      if (len == 0) break;      if (len == 0) break;
# Line 1308  while (!done) Line 1689  while (!done)
1689      p = buffer;      p = buffer;
1690      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1691    
1692      q = dbuffer;      bptr = q = dbuffer;
1693      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1694        {        {
1695        int i = 0;        int i = 0;
# Line 1330  while (!done) Line 1711  while (!done)
1711          c -= '0';          c -= '0';
1712          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1713            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1714    
1715    #if !defined NOUTF8
1716            if (use_utf8 && c > 255)
1717              {
1718              unsigned char buff8[8];
1719              int ii, utn;
1720              utn = ord2utf8(c, buff8);
1721              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1722              c = buff8[ii];   /* Last byte */
1723              }
1724    #endif
1725          break;          break;
1726    
1727          case 'x':          case 'x':
# Line 1391  while (!done) Line 1783  while (!done)
1783            }            }
1784          else if (isalnum(*p))          else if (isalnum(*p))
1785            {            {
1786            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1787            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1788              *npp++ = 0;
1789            *npp = 0;            *npp = 0;
1790            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1791            if (n < 0)            if (n < 0)
1792              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1793            else copystrings |= 1 << n;            copynamesptr = npp;
1794            }            }
1795          else if (*p == '+')          else if (*p == '+')
1796            {            {
# Line 1459  while (!done) Line 1851  while (!done)
1851            }            }
1852          else if (isalnum(*p))          else if (isalnum(*p))
1853            {            {
1854            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1855            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1856              *npp++ = 0;
1857            *npp = 0;            *npp = 0;
1858            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1859            if (n < 0)            if (n < 0)
1860              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1861            else getstrings |= 1 << n;            getnamesptr = npp;
1862            }            }
1863          continue;          continue;
1864    
# Line 1492  while (!done) Line 1884  while (!done)
1884            if (offsets == NULL)            if (offsets == NULL)
1885              {              {
1886              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1887                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1888              yield = 1;              yield = 1;
1889              goto EXIT;              goto EXIT;
1890              }              }
# Line 1505  while (!done) Line 1897  while (!done)
1897          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1898          continue;          continue;
1899    
1900            case 'Q':
1901            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1902            if (extra == NULL)
1903              {
1904              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1905              extra->flags = 0;
1906              }
1907            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1908            extra->match_limit_recursion = n;
1909            continue;
1910    
1911            case 'q':
1912            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1913            if (extra == NULL)
1914              {
1915              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1916              extra->flags = 0;
1917              }
1918            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1919            extra->match_limit = n;
1920            continue;
1921    
1922  #if !defined NODFA  #if !defined NODFA
1923          case 'R':          case 'R':
1924          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
# Line 1522  while (!done) Line 1936  while (!done)
1936          case '?':          case '?':
1937          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
1938          continue;          continue;
1939    
1940            case '<':
1941              {
1942              int x = check_newline(p, outfile);
1943              if (x == 0) goto NEXT_DATA;
1944              options |= x;
1945              while (*p++ != '>');
1946              }
1947            continue;
1948          }          }
1949        *q++ = c;        *q++ = c;
1950        }        }
# Line 1552  while (!done) Line 1975  while (!done)
1975    
1976        if (rc != 0)        if (rc != 0)
1977          {          {
1978          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1979          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1980          }          }
1981          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1982                  != 0)
1983            {
1984            fprintf(outfile, "Matched with REG_NOSUB\n");
1985            }
1986        else        else
1987          {          {
1988          size_t i;          size_t i;
# Line 1586  while (!done) Line 2014  while (!done)
2014    
2015      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2016        {        {
2017        if (timeit)        if (timeitm > 0)
2018          {          {
2019          register int i;          register int i;
2020          clock_t time_taken;          clock_t time_taken;
# Line 1596  while (!done) Line 2024  while (!done)
2024          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
2025            {            {
2026            int workspace[1000];            int workspace[1000];
2027            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
2028              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2029                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
2030                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
# Line 1604  while (!done) Line 2032  while (!done)
2032          else          else
2033  #endif  #endif
2034    
2035          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2036            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2037              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2038    
2039          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2040          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2041            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2042              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2043          }          }
2044    
2045        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2046        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2047          for the recursion limit. */
2048    
2049        if (find_match_limit)        if (find_match_limit)
2050          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2051          if (extra == NULL)          if (extra == NULL)
2052            {            {
2053            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2054            extra->flags = 0;            extra->flags = 0;
2055            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
2056    
2057          for (;;)          (void)check_match_limit(re, extra, bptr, len, start_offset,
2058            {            options|g_notempty, use_offsets, use_size_offsets,
2059            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2060            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
2061              options | g_notempty, use_offsets, use_size_offsets);  
2062            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
2063              {            options|g_notempty, use_offsets, use_size_offsets,
2064              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2065              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
2066          }          }
2067    
2068        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1708  while (!done) Line 2114  while (!done)
2114    
2115        if (count >= 0)        if (count >= 0)
2116          {          {
2117          int i;          int i, maxcount;
2118    
2119    #if !defined NODFA
2120            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2121    #endif
2122              maxcount = use_size_offsets/3;
2123    
2124            /* This is a check against a lunatic return value. */
2125    
2126            if (count > maxcount)
2127              {
2128              fprintf(outfile,
2129                "** PCRE error: returned count %d is too big for offset size %d\n",
2130                count, use_size_offsets);
2131              count = use_size_offsets/3;
2132              if (do_g || do_G)
2133                {
2134                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2135                do_g = do_G = FALSE;        /* Break g/G loop */
2136                }
2137              }
2138    
2139          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2140            {            {
2141            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1736  while (!done) Line 2163  while (!done)
2163            {            {
2164            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2165              {              {
2166              char copybuffer[16];              char copybuffer[256];
2167              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2168                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2169              if (rc < 0)              if (rc < 0)
# Line 1746  while (!done) Line 2173  while (!done)
2173              }              }
2174            }            }
2175    
2176            for (copynamesptr = copynames;
2177                 *copynamesptr != 0;
2178                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2179              {
2180              char copybuffer[256];
2181              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2182                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2183              if (rc < 0)
2184                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2185              else
2186                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2187              }
2188    
2189          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2190            {            {
2191            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1758  while (!done) Line 2198  while (!done)
2198              else              else
2199                {                {
2200                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2201                pcre_free_substring(substring);                pcre_free_substring(substring);
2202                }                }
2203              }              }
2204            }            }
2205    
2206            for (getnamesptr = getnames;
2207                 *getnamesptr != 0;
2208                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2209              {
2210              const char *substring;
2211              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2212                count, (char *)getnamesptr, &substring);
2213              if (rc < 0)
2214                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2215              else
2216                {
2217                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2218                pcre_free_substring(substring);
2219                }
2220              }
2221    
2222          if (getlist)          if (getlist)
2223            {            {
2224            const char **stringlist;            const char **stringlist;
# Line 1798  while (!done) Line 2253  while (!done)
2253          }          }
2254    
2255        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2256        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2257        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2258        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2259        offset values to achieve this. We won't be at the end of the string -  
2260        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2261          "anycrlf". If the previous match was at the end of a line terminated by
2262          CRLF, an advance of one character just passes the \r, whereas we should
2263          prefer the longer newline sequence, as does the code in pcre_exec().
2264          Fudge the offset value to achieve this.
2265    
2266          Otherwise, in the case of UTF-8 matching, the advance must be one
2267          character, not one byte. */
2268    
2269        else        else
2270          {          {
2271          if (g_notempty != 0)          if (g_notempty != 0)
2272            {            {
2273            int onechar = 1;            int onechar = 1;
2274              unsigned int obits = ((real_pcre *)re)->options;
2275            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2276            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2277                {
2278                int d;
2279                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2280                obits = (d == '\r')? PCRE_NEWLINE_CR :
2281                        (d == '\n')? PCRE_NEWLINE_LF :
2282                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2283                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2284                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2285                }
2286              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2287                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2288                  &&
2289                  start_offset < len - 1 &&
2290                  bptr[start_offset] == '\r' &&
2291                  bptr[start_offset+1] == '\n')
2292                onechar++;
2293              else if (use_utf8)
2294              {              {
2295              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2296                {                {
# Line 1845  while (!done) Line 2325  while (!done)
2325        character. */        character. */
2326    
2327        g_notempty = 0;        g_notempty = 0;
2328    
2329        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2330          {          {
2331          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1863  while (!done) Line 2344  while (!done)
2344          len -= use_offsets[1];          len -= use_offsets[1];
2345          }          }
2346        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2347    
2348        NEXT_DATA: continue;
2349      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2350    
2351    CONTINUE:    CONTINUE:
# Line 1877  while (!done) Line 2360  while (!done)
2360      {      {
2361      new_free((void *)tables);      new_free((void *)tables);
2362      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2363        locale_set = 0;
2364      }      }
2365    }    }
2366    

Legend:
Removed from v.85  
changed lines
  Added in v.230

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12