/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 79 by nigel, Sat Feb 24 21:40:52 2007 UTC revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC
# Line 36  POSSIBILITY OF SUCH DAMAGE. Line 36  POSSIBILITY OF SUCH DAMAGE.
36  */  */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
45  #include <string.h>  #include <string.h>
# Line 44  POSSIBILITY OF SUCH DAMAGE. Line 48  POSSIBILITY OF SUCH DAMAGE.
48  #include <locale.h>  #include <locale.h>
49  #include <errno.h>  #include <errno.h>
50    
 #define PCRE_SPY        /* For Win32 build, import data, not export */  
51    
52  /* We need the internal info for displaying the results of pcre_study() and  /* A number of things vary for Windows builds. Originally, pcretest opened its
53  other internal data; pcretest also uses some of the fixed tables, and generally  input and output without "b"; then I was told that "b" was needed in some
54  has "inside information" compared to a program that strictly follows the PCRE  environments, so it was added for release 5.0 to both the input and output. (It
55  API. */  makes no difference on Unix-like systems.) Later I was told that it is wrong
56    for the input on Windows. I've now abstracted the modes into two macros that
57    are set here, to make it easier to fiddle with them, and removed "b" from the
58    input mode under Windows. */
59    
60    #if defined(_WIN32) || defined(WIN32)
61    #include <io.h>                /* For _setmode() */
62    #include <fcntl.h>             /* For _O_BINARY */
63    #define INPUT_MODE   "r"
64    #define OUTPUT_MODE  "wb"
65    
66    #else
67    #include <sys/time.h>          /* These two includes are needed */
68    #include <sys/resource.h>      /* for setrlimit(). */
69    #define INPUT_MODE   "rb"
70    #define OUTPUT_MODE  "wb"
71    #endif
72    
73    
74    /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76    internal macros, structures, and other internal data values; pcretest has
77    "inside information" compared to a program that strictly follows the PCRE API.
78    
79    Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84  #include "pcre_internal.h"  #include "pcre_internal.h"
85    
86    /* We need access to the data tables that PCRE uses. So as not to have to keep
87    two copies, we include the source file here, changing the names of the external
88    symbols to prevent clashes. */
89    
90    #define _pcre_utf8_table1      utf8_table1
91    #define _pcre_utf8_table1_size utf8_table1_size
92    #define _pcre_utf8_table2      utf8_table2
93    #define _pcre_utf8_table3      utf8_table3
94    #define _pcre_utf8_table4      utf8_table4
95    #define _pcre_utt              utt
96    #define _pcre_utt_size         utt_size
97    #define _pcre_OP_lengths       OP_lengths
98    
99    #include "pcre_tables.c"
100    
101    /* We also need the pcre_printint() function for printing out compiled
102    patterns. This function is in a separate file so that it can be included in
103    pcre_compile.c when that module is compiled with debugging enabled.
104    
105    The definition of the macro PRINTABLE, which determines whether to print an
106    output character as-is or as a hex value when showing compiled patterns, is
107    contained in this file. We uses it here also, in cases when the locale has not
108    been explicitly changed, so as to get consistent output from systems that
109    differ in their output from isprint() even in the "C" locale. */
110    
111    #include "pcre_printint.src"
112    
113    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114    
115    
116  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
117  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 62  Makefile. */ Line 121  Makefile. */
121  #include "pcreposix.h"  #include "pcreposix.h"
122  #endif  #endif
123    
124  /* It is also possible, for the benefit of the version imported into Exim, to  /* It is also possible, for the benefit of the version currently imported into
125  build pcretest without support for UTF8 (define NOUTF8), without the interface  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126  to the DFA matcher (NODFA), and without the doublecheck of the old "info"  interface to the DFA matcher (NODFA), and without the doublecheck of the old
127  function (define NOINFOCHECK). */  "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137    /* Other parameters */
138    
139  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
140  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 76  function (define NOINFOCHECK). */ Line 144  function (define NOINFOCHECK). */
144  #endif  #endif
145  #endif  #endif
146    
147  #define LOOPREPEAT 500000  /* This is the default loop count for timing. */
148    
149  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define PBUFFER_SIZE BUFFER_SIZE  
 #define DBUFFER_SIZE BUFFER_SIZE  
150    
151    /* Static variables */
152    
153  static FILE *outfile;  static FILE *outfile;
154  static int log_store = 0;  static int log_store = 0;
# Line 90  static int callout_extra; Line 157  static int callout_extra;
157  static int callout_fail_count;  static int callout_fail_count;
158  static int callout_fail_id;  static int callout_fail_id;
159  static int first_callout;  static int first_callout;
160    static int locale_set = 0;
161  static int show_malloc;  static int show_malloc;
162  static int use_utf8;  static int use_utf8;
163  static size_t gotten_store;  static size_t gotten_store;
164    
165    /* The buffers grow automatically if very long input lines are encountered. */
166    
167    static int buffer_size = 50000;
168    static uschar *buffer = NULL;
169    static uschar *dbuffer = NULL;
170  static uschar *pbuffer = NULL;  static uschar *pbuffer = NULL;
171    
172    
173    
174  /*************************************************  /*************************************************
175    *        Read or extend an input line            *
176    *************************************************/
177    
178    /* Input lines are read into buffer, but both patterns and data lines can be
179    continued over multiple input lines. In addition, if the buffer fills up, we
180    want to automatically expand it so as to be able to handle extremely large
181    lines that are needed for certain stress tests. When the input buffer is
182    expanded, the other two buffers must also be expanded likewise, and the
183    contents of pbuffer, which are a copy of the input for callouts, must be
184    preserved (for when expansion happens for a data line). This is not the most
185    optimal way of handling this, but hey, this is just a test program!
186    
187    Arguments:
188      f            the file to read
189      start        where in buffer to start (this *must* be within buffer)
190    
191    Returns:       pointer to the start of new data
192                   could be a copy of start, or could be moved
193                   NULL if no data read and EOF reached
194    */
195    
196    static uschar *
197    extend_inputline(FILE *f, uschar *start)
198    {
199    uschar *here = start;
200    
201    for (;;)
202      {
203      int rlen = buffer_size - (here - buffer);
204    
205      if (rlen > 1000)
206        {
207        int dlen;
208        if (fgets((char *)here, rlen,  f) == NULL)
209          return (here == start)? NULL : start;
210        dlen = (int)strlen((char *)here);
211        if (dlen > 0 && here[dlen - 1] == '\n') return start;
212        here += dlen;
213        }
214    
215      else
216        {
217        int new_buffer_size = 2*buffer_size;
218        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
219        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
220        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
221    
222        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
223          {
224          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
225          exit(1);
226          }
227    
228        memcpy(new_buffer, buffer, buffer_size);
229        memcpy(new_pbuffer, pbuffer, buffer_size);
230    
231        buffer_size = new_buffer_size;
232    
233        start = new_buffer + (start - buffer);
234        here = new_buffer + (here - buffer);
235    
236        free(buffer);
237        free(dbuffer);
238        free(pbuffer);
239    
240        buffer = new_buffer;
241        dbuffer = new_dbuffer;
242        pbuffer = new_pbuffer;
243        }
244      }
245    
246    return NULL;  /* Control never gets here */
247    }
248    
249    
250    
251    
252    
253    
254    
255    /*************************************************
256  *          Read number from string               *  *          Read number from string               *
257  *************************************************/  *************************************************/
258    
259  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
260  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
261  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
262    
263  Arguments:  Arguments:
264    str           string to be converted    str           string to be converted
# Line 134  return(result); Line 288  return(result);
288  and returns the value of the character.  and returns the value of the character.
289    
290  Argument:  Argument:
291    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
292    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
293    
294  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
295             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
296  */  */
297    
298  #if !defined NOUTF8  #if !defined NOUTF8
299    
300  static int  static int
301  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
302  {  {
303  int c = *buffer++;  int c = *utf8bytes++;
304  int d = c;  int d = c;
305  int i, j, s;  int i, j, s;
306    
# Line 162  if (i == 0 || i == 6) return 0; / Line 316  if (i == 0 || i == 6) return 0; /
316  /* i now has a value in the range 1-5 */  /* i now has a value in the range 1-5 */
317    
318  s = 6*i;  s = 6*i;
319  d = (c & _pcre_utf8_table3[i]) << s;  d = (c & utf8_table3[i]) << s;
320    
321  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
322    {    {
323    c = *buffer++;    c = *utf8bytes++;
324    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
325    s -= 6;    s -= 6;
326    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 174  for (j = 0; j < i; j++) Line 328  for (j = 0; j < i; j++)
328    
329  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
330    
331  for (j = 0; j < _pcre_utf8_table1_size; j++)  for (j = 0; j < utf8_table1_size; j++)
332    if (d <= _pcre_utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
333  if (j != i) return -(i+1);  if (j != i) return -(i+1);
334    
335  /* Valid value */  /* Valid value */
# Line 189  return i+1; Line 343  return i+1;
343    
344    
345  /*************************************************  /*************************************************
346    *       Convert character value to UTF-8         *
347    *************************************************/
348    
349    /* This function takes an integer value in the range 0 - 0x7fffffff
350    and encodes it as a UTF-8 character in 0 to 6 bytes.
351    
352    Arguments:
353      cvalue     the character value
354      utf8bytes  pointer to buffer for result - at least 6 bytes long
355    
356    Returns:     number of characters placed in the buffer
357    */
358    
359    #if !defined NOUTF8
360    
361    static int
362    ord2utf8(int cvalue, uschar *utf8bytes)
363    {
364    register int i, j;
365    for (i = 0; i < utf8_table1_size; i++)
366      if (cvalue <= utf8_table1[i]) break;
367    utf8bytes += i;
368    for (j = i; j > 0; j--)
369     {
370     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
371     cvalue >>= 6;
372     }
373    *utf8bytes = utf8_table2[i] | cvalue;
374    return i + 1;
375    }
376    
377    #endif
378    
379    
380    
381    /*************************************************
382  *             Print character string             *  *             Print character string             *
383  *************************************************/  *************************************************/
384    
# Line 198  chars without printing. */ Line 388  chars without printing. */
388    
389  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
390  {  {
391  int c;  int c = 0;
392  int yield = 0;  int yield = 0;
393    
394  while (length-- > 0)  while (length-- > 0)
# Line 212  while (length-- > 0) Line 402  while (length-- > 0)
402        {        {
403        length -= rc - 1;        length -= rc - 1;
404        p += rc;        p += rc;
405        if (c < 256 && isprint(c))        if (PRINTHEX(c))
406          {          {
407          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
408          yield++;          yield++;
409          }          }
410        else        else
411          {          {
412          int n;          int n = 4;
413          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
414          yield += n;          yield += (n <= 0x000000ff)? 2 :
415                     (n <= 0x00000fff)? 3 :
416                     (n <= 0x0000ffff)? 4 :
417                     (n <= 0x000fffff)? 5 : 6;
418          }          }
419        continue;        continue;
420        }        }
# Line 230  while (length-- > 0) Line 423  while (length-- > 0)
423    
424     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
425    
426    if (isprint(c = *(p++)))    c = *p++;
427      if (PRINTHEX(c))
428      {      {
429      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
430      yield++;      yield++;
# Line 404  if ((rc = pcre_fullinfo(re, study, optio Line 598  if ((rc = pcre_fullinfo(re, study, optio
598  *         Byte flipping function                 *  *         Byte flipping function                 *
599  *************************************************/  *************************************************/
600    
601  static long int  static unsigned long int
602  byteflip(long int value, int n)  byteflip(unsigned long int value, int n)
603  {  {
604  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);  if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
605  return ((value & 0x000000ff) << 24) |  return ((value & 0x000000ff) << 24) |
# Line 418  return ((value & 0x000000ff) << 24) | Line 612  return ((value & 0x000000ff) << 24) |
612    
613    
614  /*************************************************  /*************************************************
615    *        Check match or recursion limit          *
616    *************************************************/
617    
618    static int
619    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
620      int start_offset, int options, int *use_offsets, int use_size_offsets,
621      int flag, unsigned long int *limit, int errnumber, const char *msg)
622    {
623    int count;
624    int min = 0;
625    int mid = 64;
626    int max = -1;
627    
628    extra->flags |= flag;
629    
630    for (;;)
631      {
632      *limit = mid;
633    
634      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
635        use_offsets, use_size_offsets);
636    
637      if (count == errnumber)
638        {
639        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
640        min = mid;
641        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
642        }
643    
644      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
645                             count == PCRE_ERROR_PARTIAL)
646        {
647        if (mid == min + 1)
648          {
649          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
650          break;
651          }
652        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
653        max = mid;
654        mid = (min + mid)/2;
655        }
656      else break;    /* Some other error */
657      }
658    
659    extra->flags &= ~flag;
660    return count;
661    }
662    
663    
664    
665    /*************************************************
666    *         Check newline indicator                *
667    *************************************************/
668    
669    /* This is used both at compile and run-time to check for <xxx> escapes, where
670    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
671    no match.
672    
673    Arguments:
674      p           points after the leading '<'
675      f           file for error message
676    
677    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
678    */
679    
680    static int
681    check_newline(uschar *p, FILE *f)
682    {
683    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
684    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
685    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
686    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
687    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
688    fprintf(f, "Unknown newline type at: <%s\n", p);
689    return 0;
690    }
691    
692    
693    
694    /*************************************************
695    *             Usage function                     *
696    *************************************************/
697    
698    static void
699    usage(void)
700    {
701    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
702    printf("  -b       show compiled code (bytecode)\n");
703    printf("  -C       show PCRE compile-time options and exit\n");
704    printf("  -d       debug: show compiled code and information (-b and -i)\n");
705    #if !defined NODFA
706    printf("  -dfa     force DFA matching for all subjects\n");
707    #endif
708    printf("  -help    show usage information\n");
709    printf("  -i       show information about compiled patterns\n"
710           "  -m       output memory used information\n"
711           "  -o <n>   set size of offsets vector to <n>\n");
712    #if !defined NOPOSIX
713    printf("  -p       use POSIX interface\n");
714    #endif
715    printf("  -q       quiet: do not output PCRE version number at start\n");
716    printf("  -S <n>   set stack size to <n> megabytes\n");
717    printf("  -s       output store (memory) used information\n"
718           "  -t       time compilation and execution\n");
719    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
720    printf("  -tm      time execution (matching) only\n");
721    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
722    }
723    
724    
725    
726    /*************************************************
727  *                Main Program                    *  *                Main Program                    *
728  *************************************************/  *************************************************/
729    
# Line 432  int options = 0; Line 738  int options = 0;
738  int study_options = 0;  int study_options = 0;
739  int op = 1;  int op = 1;
740  int timeit = 0;  int timeit = 0;
741    int timeitm = 0;
742  int showinfo = 0;  int showinfo = 0;
743  int showstore = 0;  int showstore = 0;
744    int quiet = 0;
745  int size_offsets = 45;  int size_offsets = 45;
746  int size_offsets_max;  int size_offsets_max;
747  int *offsets = NULL;  int *offsets = NULL;
# Line 444  int debug = 0; Line 752  int debug = 0;
752  int done = 0;  int done = 0;
753  int all_use_dfa = 0;  int all_use_dfa = 0;
754  int yield = 0;  int yield = 0;
755    int stack_size;
756    
757    /* These vectors store, end-to-end, a list of captured substring names. Assume
758    that 1024 is plenty long enough for the few names we'll be testing. */
759    
760  unsigned char *buffer;  uschar copynames[1024];
761  unsigned char *dbuffer;  uschar getnames[1024];
762    
763    uschar *copynamesptr;
764    uschar *getnamesptr;
765    
766  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
767  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
768    
769  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
770  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
771  pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);  pbuffer = (unsigned char *)malloc(buffer_size);
   
 /* The outfile variable is static so that new_malloc can use it. The _setmode()  
 stuff is some magic that I don't understand, but which apparently does good  
 things in Windows. It's related to line terminations.  */  
772    
773  #if defined(_WIN32) || defined(WIN32)  /* The outfile variable is static so that new_malloc can use it. */
 _setmode( _fileno( stdout ), 0x8000 );  
 #endif  /* defined(_WIN32) || defined(WIN32) */  
774    
775  outfile = stdout;  outfile = stdout;
776    
777    /* The following  _setmode() stuff is some Windows magic that tells its runtime
778    library to translate CRLF into a single LF character. At least, that's what
779    I've been told: never having used Windows I take this all on trust. Originally
780    it set 0x8000, but then I was advised that _O_BINARY was better. */
781    
782    #if defined(_WIN32) || defined(WIN32)
783    _setmode( _fileno( stdout ), _O_BINARY );
784    #endif
785    
786  /* Scan options */  /* Scan options */
787    
788  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 473  while (argc > 1 && argv[op][0] == '-') Line 791  while (argc > 1 && argv[op][0] == '-')
791    
792    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
793      showstore = 1;      showstore = 1;
794    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
795      else if (strcmp(argv[op], "-b") == 0) debug = 1;
796    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
797    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
798  #if !defined NODFA  #if !defined NODFA
# Line 486  while (argc > 1 && argv[op][0] == '-') Line 805  while (argc > 1 && argv[op][0] == '-')
805      op++;      op++;
806      argc--;      argc--;
807      }      }
808      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
809        {
810        int both = argv[op][2] == 0;
811        int temp;
812        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
813                         *endptr == 0))
814          {
815          timeitm = temp;
816          op++;
817          argc--;
818          }
819        else timeitm = LOOPREPEAT;
820        if (both) timeit = timeitm;
821        }
822      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
823          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
824            *endptr == 0))
825        {
826    #if defined(_WIN32) || defined(WIN32)
827        printf("PCRE: -S not supported on this OS\n");
828        exit(1);
829    #else
830        int rc;
831        struct rlimit rlim;
832        getrlimit(RLIMIT_STACK, &rlim);
833        rlim.rlim_cur = stack_size * 1024 * 1024;
834        rc = setrlimit(RLIMIT_STACK, &rlim);
835        if (rc != 0)
836          {
837        printf("PCRE: setrlimit() failed with error %d\n", rc);
838        exit(1);
839          }
840        op++;
841        argc--;
842    #endif
843        }
844  #if !defined NOPOSIX  #if !defined NOPOSIX
845    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
846  #endif  #endif
# Line 499  while (argc > 1 && argv[op][0] == '-') Line 854  while (argc > 1 && argv[op][0] == '-')
854      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
855      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
856      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
857      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
858          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
859          (rc == -2)? "ANYCRLF" :
860          (rc == -1)? "ANY" : "???");
861      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
862      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
863      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
864      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
865      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
866      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
867        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
868        printf("  Default recursion depth limit = %d\n", rc);
869      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
870      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
871      exit(0);      goto EXIT;
872        }
873      else if (strcmp(argv[op], "-help") == 0 ||
874               strcmp(argv[op], "--help") == 0)
875        {
876        usage();
877        goto EXIT;
878      }      }
879    else    else
880      {      {
881      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
882      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
     printf("  -C     show PCRE compile-time options and exit\n");  
     printf("  -d     debug: show compiled code; implies -i\n");  
 #if !defined NODFA  
     printf("  -dfa   force DFA matching for all subjects\n");  
 #endif  
     printf("  -i     show information about compiled pattern\n"  
            "  -m     output memory used information\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store (memory) used information\n"  
            "  -t     time compilation and execution\n");  
883      yield = 1;      yield = 1;
884      goto EXIT;      goto EXIT;
885      }      }
# Line 541  offsets = (int *)malloc(size_offsets_max Line 894  offsets = (int *)malloc(size_offsets_max
894  if (offsets == NULL)  if (offsets == NULL)
895    {    {
896    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
897      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
898    yield = 1;    yield = 1;
899    goto EXIT;    goto EXIT;
900    }    }
# Line 550  if (offsets == NULL) Line 903  if (offsets == NULL)
903    
904  if (argc > 1)  if (argc > 1)
905    {    {
906    infile = fopen(argv[op], "rb");    infile = fopen(argv[op], INPUT_MODE);
907    if (infile == NULL)    if (infile == NULL)
908      {      {
909      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 561  if (argc > 1) Line 914  if (argc > 1)
914    
915  if (argc > 2)  if (argc > 2)
916    {    {
917    outfile = fopen(argv[op+1], "wb");    outfile = fopen(argv[op+1], OUTPUT_MODE);
918    if (outfile == NULL)    if (outfile == NULL)
919      {      {
920      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 577  pcre_free = new_free; Line 930  pcre_free = new_free;
930  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
931  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
932    
933  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
934    
935  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
936    
937  /* Main loop */  /* Main loop */
938    
# Line 601  while (!done) Line 954  while (!done)
954    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
955    int do_study = 0;    int do_study = 0;
956    int do_debug = debug;    int do_debug = debug;
957      int debug_lengths = 1;
958    int do_G = 0;    int do_G = 0;
959    int do_g = 0;    int do_g = 0;
960    int do_showinfo = showinfo;    int do_showinfo = showinfo;
961    int do_showrest = 0;    int do_showrest = 0;
962    int do_flip = 0;    int do_flip = 0;
963    int erroroffset, len, delimiter;    int erroroffset, len, delimiter, poffset;
964    
965    use_utf8 = 0;    use_utf8 = 0;
966    
967    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
968    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
969    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
970    fflush(outfile);    fflush(outfile);
971    
# Line 623  while (!done) Line 977  while (!done)
977    
978    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)    if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
979      {      {
980      unsigned long int magic;      unsigned long int magic, get_options;
981      uschar sbuf[8];      uschar sbuf[8];
982      FILE *f;      FILE *f;
983    
# Line 671  while (!done) Line 1025  while (!done)
1025    
1026      /* Need to know if UTF-8 for printing data strings */      /* Need to know if UTF-8 for printing data strings */
1027    
1028      new_info(re, NULL, PCRE_INFO_OPTIONS, &options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1029      use_utf8 = (options & PCRE_UTF8) != 0;      use_utf8 = (get_options & PCRE_UTF8) != 0;
1030    
1031      /* Now see if there is any following study data */      /* Now see if there is any following study data */
1032    
# Line 716  while (!done) Line 1070  while (!done)
1070      }      }
1071    
1072    pp = p;    pp = p;
1073      poffset = p - buffer;
1074    
1075    for(;;)    for(;;)
1076      {      {
# Line 726  while (!done) Line 1081  while (!done)
1081        pp++;        pp++;
1082        }        }
1083      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1084      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1085      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1086        {        {
1087        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1088        done = 1;        done = 1;
# Line 744  while (!done) Line 1091  while (!done)
1091      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1092      }      }
1093    
1094      /* The buffer may have moved while being extended; reset the start of data
1095      pointer to the correct relative point in the buffer. */
1096    
1097      p = buffer + poffset;
1098    
1099    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1100    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1101    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
# Line 775  while (!done) Line 1127  while (!done)
1127    
1128        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1129        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1130          case 'B': do_debug = 1; break;
1131        case 'C': options |= PCRE_AUTO_CALLOUT; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1132        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1133        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1134        case 'F': do_flip = 1; break;        case 'F': do_flip = 1; break;
1135        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1136        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1137          case 'J': options |= PCRE_DUPNAMES; break;
1138        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1139        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1140    
# Line 791  while (!done) Line 1145  while (!done)
1145        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1146        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1147        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1148          case 'Z': debug_lengths = 0; break;
1149        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1150        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1151    
1152        case 'L':        case 'L':
1153        ppp = pp;        ppp = pp;
1154        /* The '\r' test here is so that it works on Windows */        /* The '\r' test here is so that it works on Windows. */
1155        while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;        /* The '0' test is just in case this is an unterminated line. */
1156          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1157        *ppp = 0;        *ppp = 0;
1158        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1159          {          {
1160          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1161          goto SKIP_DATA;          goto SKIP_DATA;
1162          }          }
1163          locale_set = 1;
1164        tables = pcre_maketables();        tables = pcre_maketables();
1165        pp = ppp;        pp = ppp;
1166        break;        break;
# Line 815  while (!done) Line 1172  while (!done)
1172        *pp = 0;        *pp = 0;
1173        break;        break;
1174    
1175          case '<':
1176            {
1177            int x = check_newline(pp, outfile);
1178            if (x == 0) goto SKIP_DATA;
1179            options |= x;
1180            while (*pp++ != '>');
1181            }
1182          break;
1183    
1184        case '\r':                      /* So that it works in Windows */        case '\r':                      /* So that it works in Windows */
1185        case '\n':        case '\n':
1186        case ' ':        case ' ':
# Line 839  while (!done) Line 1205  while (!done)
1205      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1206      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1207      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;      if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1208        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1209        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1210    
1211      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1212    
1213      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 846  while (!done) Line 1215  while (!done)
1215    
1216      if (rc != 0)      if (rc != 0)
1217        {        {
1218        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1219        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1220        goto SKIP_DATA;        goto SKIP_DATA;
1221        }        }
# Line 858  while (!done) Line 1227  while (!done)
1227  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1228    
1229      {      {
1230      if (timeit)      if (timeit > 0)
1231        {        {
1232        register int i;        register int i;
1233        clock_t time_taken;        clock_t time_taken;
1234        clock_t start_time = clock();        clock_t start_time = clock();
1235        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1236          {          {
1237          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1238          if (re != NULL) free(re);          if (re != NULL) free(re);
1239          }          }
1240        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1241        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1242          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1243            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1244        }        }
1245    
# Line 887  while (!done) Line 1256  while (!done)
1256          {          {
1257          for (;;)          for (;;)
1258            {            {
1259            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1260              {              {
1261              done = 1;              done = 1;
1262              goto CONTINUE;              goto CONTINUE;
# Line 922  while (!done) Line 1291  while (!done)
1291    
1292      if (do_study)      if (do_study)
1293        {        {
1294        if (timeit)        if (timeit > 0)
1295          {          {
1296          register int i;          register int i;
1297          clock_t time_taken;          clock_t time_taken;
1298          clock_t start_time = clock();          clock_t start_time = clock();
1299          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeit; i++)
1300            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
1301          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1302          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
1303          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
1304            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeit) /
1305              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1306          }          }
1307        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
# Line 975  while (!done) Line 1344  while (!done)
1344    
1345      SHOW_INFO:      SHOW_INFO:
1346    
1347        if (do_debug)
1348          {
1349          fprintf(outfile, "------------------------------------------------------------------\n");
1350          pcre_printint(re, outfile, debug_lengths);
1351          }
1352    
1353      if (do_showinfo)      if (do_showinfo)
1354        {        {
1355        unsigned long int get_options, all_options;        unsigned long int get_options, all_options;
1356  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1357        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1358  #endif  #endif
1359        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char, okpartial, jchanged;
1360        int nameentrysize, namecount;        int nameentrysize, namecount;
1361        const uschar *nametable;        const uschar *nametable;
1362    
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         _pcre_printint(re, outfile);  
         }  
   
1363        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1364        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1365        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
# Line 1000  while (!done) Line 1369  while (!done)
1369        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1370        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1371        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1372          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1373          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1374    
1375  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
1376        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
# Line 1041  while (!done) Line 1412  while (!done)
1412            }            }
1413          }          }
1414    
1415        /* The NOPARTIAL bit is a private bit in the options, so we have        if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
       to fish it out via out back door */  
1416    
1417        all_options = ((real_pcre *)re)->options;        all_options = ((real_pcre *)re)->options;
1418        if (do_flip)        if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
         {  
         all_options = byteflip(all_options, sizeof(all_options));  
         }  
   
       if ((all_options & PCRE_NOPARTIAL) != 0)  
         fprintf(outfile, "Partial matching not supported\n");  
1419    
1420        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1421          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1422            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1423            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1424            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
# Line 1064  while (!done) Line 1428  while (!done)
1428            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1429            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1430            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1431              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1432            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1433            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1434              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1435    
1436          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1437    
1438          switch (get_options & PCRE_NEWLINE_BITS)
1439            {
1440            case PCRE_NEWLINE_CR:
1441            fprintf(outfile, "Forced newline sequence: CR\n");
1442            break;
1443    
1444        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_LF:
1445          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: LF\n");
1446            break;
1447    
1448            case PCRE_NEWLINE_CRLF:
1449            fprintf(outfile, "Forced newline sequence: CRLF\n");
1450            break;
1451    
1452            case PCRE_NEWLINE_ANYCRLF:
1453            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1454            break;
1455    
1456            case PCRE_NEWLINE_ANY:
1457            fprintf(outfile, "Forced newline sequence: ANY\n");
1458            break;
1459    
1460            default:
1461            break;
1462            }
1463    
1464        if (first_char == -1)        if (first_char == -1)
1465          {          {
1466          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1467          }          }
1468        else if (first_char < 0)        else if (first_char < 0)
1469          {          {
# Line 1083  while (!done) Line 1474  while (!done)
1474          int ch = first_char & 255;          int ch = first_char & 255;
1475          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1476            "" : " (caseless)";            "" : " (caseless)";
1477          if (isprint(ch))          if (PRINTHEX(ch))
1478            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1479          else          else
1480            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 1098  while (!done) Line 1489  while (!done)
1489          int ch = need_char & 255;          int ch = need_char & 255;
1490          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1491            "" : " (caseless)";            "" : " (caseless)";
1492          if (isprint(ch))          if (PRINTHEX(ch))
1493            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1494          else          else
1495            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
# Line 1134  while (!done) Line 1525  while (!done)
1525                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
1526                    c = 2;                    c = 2;
1527                    }                    }
1528                  if (isprint(i) && i != ' ')                  if (PRINTHEX(i) && i != ' ')
1529                    {                    {
1530                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
1531                    c += 2;                    c += 2;
# Line 1193  while (!done) Line 1584  while (!done)
1584                  strerror(errno));                  strerror(errno));
1585                }                }
1586              else fprintf(outfile, "Study data written to %s\n", to_file);              else fprintf(outfile, "Study data written to %s\n", to_file);
1587    
1588              }              }
1589            }            }
1590          fclose(f);          fclose(f);
# Line 1209  while (!done) Line 1601  while (!done)
1601    
1602    for (;;)    for (;;)
1603      {      {
1604      unsigned char *q;      uschar *q;
1605      unsigned char *bptr = dbuffer;      uschar *bptr;
1606      int *use_offsets = offsets;      int *use_offsets = offsets;
1607      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1608      int callout_data = 0;      int callout_data = 0;
# Line 1227  while (!done) Line 1619  while (!done)
1619    
1620      options = 0;      options = 0;
1621    
1622        *copynames = 0;
1623        *getnames = 0;
1624    
1625        copynamesptr = copynames;
1626        getnamesptr = getnames;
1627    
1628      pcre_callout = callout;      pcre_callout = callout;
1629      first_callout = 1;      first_callout = 1;
1630      callout_extra = 0;      callout_extra = 0;
# Line 1235  while (!done) Line 1633  while (!done)
1633      callout_fail_id = -1;      callout_fail_id = -1;
1634      show_malloc = 0;      show_malloc = 0;
1635    
1636      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1637      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1638    
1639        len = 0;
1640        for (;;)
1641        {        {
1642        done = 1;        if (infile == stdin) printf("data> ");
1643        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1644            {
1645            if (len > 0) break;
1646            done = 1;
1647            goto CONTINUE;
1648            }
1649          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1650          len = (int)strlen((char *)buffer);
1651          if (buffer[len-1] == '\n') break;
1652        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1653    
     len = (int)strlen((char *)buffer);  
1654      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1655      buffer[len] = 0;      buffer[len] = 0;
1656      if (len == 0) break;      if (len == 0) break;
# Line 1251  while (!done) Line 1658  while (!done)
1658      p = buffer;      p = buffer;
1659      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1660    
1661      q = dbuffer;      bptr = q = dbuffer;
1662      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1663        {        {
1664        int i = 0;        int i = 0;
# Line 1273  while (!done) Line 1680  while (!done)
1680          c -= '0';          c -= '0';
1681          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1682            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1683    
1684    #if !defined NOUTF8
1685            if (use_utf8 && c > 255)
1686              {
1687              unsigned char buff8[8];
1688              int ii, utn;
1689              utn = ord2utf8(c, buff8);
1690              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1691              c = buff8[ii];   /* Last byte */
1692              }
1693    #endif
1694          break;          break;
1695    
1696          case 'x':          case 'x':
# Line 1290  while (!done) Line 1708  while (!done)
1708              {              {
1709              unsigned char buff8[8];              unsigned char buff8[8];
1710              int ii, utn;              int ii, utn;
1711              utn = _pcre_ord2utf8(c, buff8);              utn = ord2utf8(c, buff8);
1712              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1713              c = buff8[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1714              p = pt + 1;              p = pt + 1;
# Line 1334  while (!done) Line 1752  while (!done)
1752            }            }
1753          else if (isalnum(*p))          else if (isalnum(*p))
1754            {            {
1755            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1756            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1757              *npp++ = 0;
1758            *npp = 0;            *npp = 0;
1759            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1760            if (n < 0)            if (n < 0)
1761              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1762            else copystrings |= 1 << n;            copynamesptr = npp;
1763            }            }
1764          else if (*p == '+')          else if (*p == '+')
1765            {            {
# Line 1402  while (!done) Line 1820  while (!done)
1820            }            }
1821          else if (isalnum(*p))          else if (isalnum(*p))
1822            {            {
1823            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1824            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1825              *npp++ = 0;
1826            *npp = 0;            *npp = 0;
1827            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1828            if (n < 0)            if (n < 0)
1829              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1830            else getstrings |= 1 << n;            getnamesptr = npp;
1831            }            }
1832          continue;          continue;
1833    
# Line 1435  while (!done) Line 1853  while (!done)
1853            if (offsets == NULL)            if (offsets == NULL)
1854              {              {
1855              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1856                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1857              yield = 1;              yield = 1;
1858              goto EXIT;              goto EXIT;
1859              }              }
# Line 1448  while (!done) Line 1866  while (!done)
1866          options |= PCRE_PARTIAL;          options |= PCRE_PARTIAL;
1867          continue;          continue;
1868    
1869            case 'Q':
1870            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871            if (extra == NULL)
1872              {
1873              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874              extra->flags = 0;
1875              }
1876            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1877            extra->match_limit_recursion = n;
1878            continue;
1879    
1880            case 'q':
1881            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1882            if (extra == NULL)
1883              {
1884              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1885              extra->flags = 0;
1886              }
1887            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1888            extra->match_limit = n;
1889            continue;
1890    
1891  #if !defined NODFA  #if !defined NODFA
1892          case 'R':          case 'R':
1893          options |= PCRE_DFA_RESTART;          options |= PCRE_DFA_RESTART;
# Line 1465  while (!done) Line 1905  while (!done)
1905          case '?':          case '?':
1906          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
1907          continue;          continue;
1908    
1909            case '<':
1910              {
1911              int x = check_newline(p, outfile);
1912              if (x == 0) goto NEXT_DATA;
1913              options |= x;
1914              while (*p++ != '>');
1915              }
1916            continue;
1917          }          }
1918        *q++ = c;        *q++ = c;
1919        }        }
# Line 1495  while (!done) Line 1944  while (!done)
1944    
1945        if (rc != 0)        if (rc != 0)
1946          {          {
1947          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1948          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1949          }          }
1950          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1951                  != 0)
1952            {
1953            fprintf(outfile, "Matched with REG_NOSUB\n");
1954            }
1955        else        else
1956          {          {
1957          size_t i;          size_t i;
# Line 1529  while (!done) Line 1983  while (!done)
1983    
1984      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1985        {        {
1986        if (timeit)        if (timeitm > 0)
1987          {          {
1988          register int i;          register int i;
1989          clock_t time_taken;          clock_t time_taken;
# Line 1539  while (!done) Line 1993  while (!done)
1993          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
1994            {            {
1995            int workspace[1000];            int workspace[1000];
1996            for (i = 0; i < LOOPREPEAT; i++)            for (i = 0; i < timeitm; i++)
1997              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,              count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1998                options | g_notempty, use_offsets, use_size_offsets, workspace,                options | g_notempty, use_offsets, use_size_offsets, workspace,
1999                sizeof(workspace)/sizeof(int));                sizeof(workspace)/sizeof(int));
# Line 1547  while (!done) Line 2001  while (!done)
2001          else          else
2002  #endif  #endif
2003    
2004          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < timeitm; i++)
2005            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2006              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2007    
2008          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2009          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2010            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2011              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2012          }          }
2013    
2014        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2015        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2016          for the recursion limit. */
2017    
2018        if (find_match_limit)        if (find_match_limit)
2019          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2020          if (extra == NULL)          if (extra == NULL)
2021            {            {
2022            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2023            extra->flags = 0;            extra->flags = 0;
2024            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
2025    
2026          for (;;)          (void)check_match_limit(re, extra, bptr, len, start_offset,
2027            {            options|g_notempty, use_offsets, use_size_offsets,
2028            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2029            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
2030              options | g_notempty, use_offsets, use_size_offsets);  
2031            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
2032              {            options|g_notempty, use_offsets, use_size_offsets,
2033              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2034              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||  
                                  count == PCRE_ERROR_PARTIAL)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
2035          }          }
2036    
2037        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1651  while (!done) Line 2083  while (!done)
2083    
2084        if (count >= 0)        if (count >= 0)
2085          {          {
2086          int i;          int i, maxcount;
2087    
2088    #if !defined NODFA
2089            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2090    #endif
2091              maxcount = use_size_offsets/3;
2092    
2093            /* This is a check against a lunatic return value. */
2094    
2095            if (count > maxcount)
2096              {
2097              fprintf(outfile,
2098                "** PCRE error: returned count %d is too big for offset size %d\n",
2099                count, use_size_offsets);
2100              count = use_size_offsets/3;
2101              if (do_g || do_G)
2102                {
2103                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2104                do_g = do_G = FALSE;        /* Break g/G loop */
2105                }
2106              }
2107    
2108          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2109            {            {
2110            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1679  while (!done) Line 2132  while (!done)
2132            {            {
2133            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2134              {              {
2135              char copybuffer[16];              char copybuffer[256];
2136              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2137                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2138              if (rc < 0)              if (rc < 0)
# Line 1689  while (!done) Line 2142  while (!done)
2142              }              }
2143            }            }
2144    
2145            for (copynamesptr = copynames;
2146                 *copynamesptr != 0;
2147                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2148              {
2149              char copybuffer[256];
2150              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2151                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2152              if (rc < 0)
2153                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2154              else
2155                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2156              }
2157    
2158          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2159            {            {
2160            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1701  while (!done) Line 2167  while (!done)
2167              else              else
2168                {                {
2169                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2170                pcre_free_substring(substring);                pcre_free_substring(substring);
2171                }                }
2172              }              }
2173            }            }
2174    
2175            for (getnamesptr = getnames;
2176                 *getnamesptr != 0;
2177                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2178              {
2179              const char *substring;
2180              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2181                count, (char *)getnamesptr, &substring);
2182              if (rc < 0)
2183                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2184              else
2185                {
2186                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2187                pcre_free_substring(substring);
2188                }
2189              }
2190    
2191          if (getlist)          if (getlist)
2192            {            {
2193            const char **stringlist;            const char **stringlist;
# Line 1741  while (!done) Line 2222  while (!done)
2222          }          }
2223    
2224        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2225        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2226        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2227        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2228        offset values to achieve this. We won't be at the end of the string -  
2229        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2230          "anycrlf". If the previous match was at the end of a line terminated by
2231          CRLF, an advance of one character just passes the \r, whereas we should
2232          prefer the longer newline sequence, as does the code in pcre_exec().
2233          Fudge the offset value to achieve this.
2234    
2235          Otherwise, in the case of UTF-8 matching, the advance must be one
2236          character, not one byte. */
2237    
2238        else        else
2239          {          {
2240          if (g_notempty != 0)          if (g_notempty != 0)
2241            {            {
2242            int onechar = 1;            int onechar = 1;
2243              unsigned int obits = ((real_pcre *)re)->options;
2244            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2245            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2246                {
2247                int d;
2248                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2249                obits = (d == '\r')? PCRE_NEWLINE_CR :
2250                        (d == '\n')? PCRE_NEWLINE_LF :
2251                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2252                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2253                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2254                }
2255              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2256                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2257                  &&
2258                  start_offset < len - 1 &&
2259                  bptr[start_offset] == '\r' &&
2260                  bptr[start_offset+1] == '\n')
2261                onechar++;
2262              else if (use_utf8)
2263              {              {
2264              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2265                {                {
# Line 1788  while (!done) Line 2294  while (!done)
2294        character. */        character. */
2295    
2296        g_notempty = 0;        g_notempty = 0;
2297    
2298        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2299          {          {
2300          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1806  while (!done) Line 2313  while (!done)
2313          len -= use_offsets[1];          len -= use_offsets[1];
2314          }          }
2315        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2316    
2317        NEXT_DATA: continue;
2318      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2319    
2320    CONTINUE:    CONTINUE:
# Line 1820  while (!done) Line 2329  while (!done)
2329      {      {
2330      new_free((void *)tables);      new_free((void *)tables);
2331      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2332        locale_set = 0;
2333      }      }
2334    }    }
2335    

Legend:
Removed from v.79  
changed lines
  Added in v.200

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12