/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC revision 96 by nigel, Fri Mar 2 13:10:43 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 42  been extended and consequently is now ra
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47    
48    /* A number of things vary for Windows builds. Originally, pcretest opened its
49    input and output without "b"; then I was told that "b" was needed in some
50    environments, so it was added for release 5.0 to both the input and output. (It
51    makes no difference on Unix-like systems.) Later I was told that it is wrong
52    for the input on Windows. I've now abstracted the modes into two macros that
53    are set here, to make it easier to fiddle with them, and removed "b" from the
54    input mode under Windows. */
55    
56    #if defined(_WIN32) || defined(WIN32)
57    #include <io.h>                /* For _setmode() */
58    #include <fcntl.h>             /* For _O_BINARY */
59    #define INPUT_MODE   "r"
60    #define OUTPUT_MODE  "wb"
61    
62    #else
63    #include <sys/time.h>          /* These two includes are needed */
64    #include <sys/resource.h>      /* for setrlimit(). */
65    #define INPUT_MODE   "rb"
66    #define OUTPUT_MODE  "wb"
67    #endif
68    
 /* We need the internal info for displaying the results of pcre_study(). Also  
 for getting the opcodes for showing compiled code. */  
69    
70  #define PCRE_SPY        /* For Win32 build, import data, not export */  #define PCRE_SPY        /* For Win32 build, import data, not export */
71  #include "internal.h"  
72    /* We include pcre_internal.h because we need the internal info for displaying
73    the results of pcre_study() and we also need to know about the internal
74    macros, structures, and other internal data values; pcretest has "inside
75    information" compared to a program that strictly follows the PCRE API. */
76    
77    #include "pcre_internal.h"
78    
79    /* We need access to the data tables that PCRE uses. So as not to have to keep
80    two copies, we include the source file here, changing the names of the external
81    symbols to prevent clashes. */
82    
83    #define _pcre_utf8_table1      utf8_table1
84    #define _pcre_utf8_table1_size utf8_table1_size
85    #define _pcre_utf8_table2      utf8_table2
86    #define _pcre_utf8_table3      utf8_table3
87    #define _pcre_utf8_table4      utf8_table4
88    #define _pcre_utt              utt
89    #define _pcre_utt_size         utt_size
90    #define _pcre_OP_lengths       OP_lengths
91    
92    #include "pcre_tables.c"
93    
94    /* We also need the pcre_printint() function for printing out compiled
95    patterns. This function is in a separate file so that it can be included in
96    pcre_compile.c when that module is compiled with debugging enabled.
97    
98    The definition of the macro PRINTABLE, which determines whether to print an
99    output character as-is or as a hex value when showing compiled patterns, is
100    contained in this file. We uses it here also, in cases when the locale has not
101    been explicitly changed, so as to get consistent output from systems that
102    differ in their output from isprint() even in the "C" locale. */
103    
104    #include "pcre_printint.src"
105    
106    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
107    
108    
109  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
110  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 114  Makefile. */
114  #include "pcreposix.h"  #include "pcreposix.h"
115  #endif  #endif
116    
117    /* It is also possible, for the benefit of the version imported into Exim, to
118    build pcretest without support for UTF8 (define NOUTF8), without the interface
119    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
120    function (define NOINFOCHECK). */
121    
122    
123    /* Other parameters */
124    
125  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
126  #ifdef CLK_TCK  #ifdef CLK_TCK
127  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 130  Makefile. */
130  #endif  #endif
131  #endif  #endif
132    
133  #define LOOPREPEAT 50000  /* This is the default loop count for timing. */
134    
135    #define LOOPREPEAT 500000
136    
137    /* Static variables */
138    
139  static FILE *outfile;  static FILE *outfile;
140  static int log_store = 0;  static int log_store = 0;
# Line 45  static int callout_extra; Line 143  static int callout_extra;
143  static int callout_fail_count;  static int callout_fail_count;
144  static int callout_fail_id;  static int callout_fail_id;
145  static int first_callout;  static int first_callout;
146  static int utf8;  static int locale_set = 0;
147    static int show_malloc;
148    static int use_utf8;
149  static size_t gotten_store;  static size_t gotten_store;
150    
151    /* The buffers grow automatically if very long input lines are encountered. */
152    
153    static int buffer_size = 50000;
154    static uschar *buffer = NULL;
155    static uschar *dbuffer = NULL;
156    static uschar *pbuffer = NULL;
157    
158    
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
159    
160  static int utf8_table2[] = {  /*************************************************
161    0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  *        Read or extend an input line            *
162    *************************************************/
163    
164  static int utf8_table3[] = {  /* Input lines are read into buffer, but both patterns and data lines can be
165    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  continued over multiple input lines. In addition, if the buffer fills up, we
166    want to automatically expand it so as to be able to handle extremely large
167    lines that are needed for certain stress tests. When the input buffer is
168    expanded, the other two buffers must also be expanded likewise, and the
169    contents of pbuffer, which are a copy of the input for callouts, must be
170    preserved (for when expansion happens for a data line). This is not the most
171    optimal way of handling this, but hey, this is just a test program!
172    
173    Arguments:
174      f            the file to read
175      start        where in buffer to start (this *must* be within buffer)
176    
177    Returns:       pointer to the start of new data
178                   could be a copy of start, or could be moved
179                   NULL if no data read and EOF reached
180    */
181    
182    static uschar *
183    extend_inputline(FILE *f, uschar *start)
184    {
185    uschar *here = start;
186    
187    for (;;)
188      {
189      int rlen = buffer_size - (here - buffer);
190    
191      if (rlen > 1000)
192        {
193        int dlen;
194        if (fgets((char *)here, rlen,  f) == NULL)
195          return (here == start)? NULL : start;
196        dlen = (int)strlen((char *)here);
197        if (dlen > 0 && here[dlen - 1] == '\n') return start;
198        here += dlen;
199        }
200    
201      else
202        {
203        int new_buffer_size = 2*buffer_size;
204        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
205        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
206        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
207    
208        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
209          {
210          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
211          exit(1);
212          }
213    
214        memcpy(new_buffer, buffer, buffer_size);
215        memcpy(new_pbuffer, pbuffer, buffer_size);
216    
217        buffer_size = new_buffer_size;
218    
219        start = new_buffer + (start - buffer);
220        here = new_buffer + (here - buffer);
221    
222        free(buffer);
223        free(dbuffer);
224        free(pbuffer);
225    
226        buffer = new_buffer;
227        dbuffer = new_dbuffer;
228        pbuffer = new_pbuffer;
229        }
230      }
231    
232    return NULL;  /* Control never gets here */
233    }
234    
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
235    
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. */  
236    
 static uschar OP_lengths[] = { OP_LENGTHS };  
237    
 #include "printint.c"  
238    
239    
240    
# Line 82  static uschar OP_lengths[] = { OP_LENGTH Line 244  static uschar OP_lengths[] = { OP_LENGTH
244    
245  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
246  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
247  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
248    
249  Arguments:  Arguments:
250    str           string to be converted    str           string to be converted
# Line 103  return(result); Line 265  return(result);
265    
266    
267    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
268    
269  /*************************************************  /*************************************************
270  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 148  return i + 1; Line 274  return i + 1;
274  and returns the value of the character.  and returns the value of the character.
275    
276  Argument:  Argument:
277    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
278    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
279    
280  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
281             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
282  */  */
283    
284  int  #if !defined NOUTF8
285  utf82ord(unsigned char *buffer, int *vptr)  
286    static int
287    utf82ord(unsigned char *utf8bytes, int *vptr)
288  {  {
289  int c = *buffer++;  int c = *utf8bytes++;
290  int d = c;  int d = c;
291  int i, j, s;  int i, j, s;
292    
# Line 178  d = (c & utf8_table3[i]) << s; Line 306  d = (c & utf8_table3[i]) << s;
306    
307  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
308    {    {
309    c = *buffer++;    c = *utf8bytes++;
310    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
311    s -= 6;    s -= 6;
312    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 186  for (j = 0; j < i; j++) Line 314  for (j = 0; j < i; j++)
314    
315  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
316    
317  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
318    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
319  if (j != i) return -(i+1);  if (j != i) return -(i+1);
320    
# Line 196  if (j != i) return -(i+1); Line 324  if (j != i) return -(i+1);
324  return i+1;  return i+1;
325  }  }
326    
327    #endif
328    
329    
330    
331    /*************************************************
332    *       Convert character value to UTF-8         *
333    *************************************************/
334    
335    /* This function takes an integer value in the range 0 - 0x7fffffff
336    and encodes it as a UTF-8 character in 0 to 6 bytes.
337    
338    Arguments:
339      cvalue     the character value
340      utf8bytes  pointer to buffer for result - at least 6 bytes long
341    
342    Returns:     number of characters placed in the buffer
343    */
344    
345    #if !defined NOUTF8
346    
347    static int
348    ord2utf8(int cvalue, uschar *utf8bytes)
349    {
350    register int i, j;
351    for (i = 0; i < utf8_table1_size; i++)
352      if (cvalue <= utf8_table1[i]) break;
353    utf8bytes += i;
354    for (j = i; j > 0; j--)
355     {
356     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
357     cvalue >>= 6;
358     }
359    *utf8bytes = utf8_table2[i] | cvalue;
360    return i + 1;
361    }
362    
363    #endif
364    
365    
366    
367  /*************************************************  /*************************************************
# Line 208  chars without printing. */ Line 374  chars without printing. */
374    
375  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
376  {  {
377  int c;  int c = 0;
378  int yield = 0;  int yield = 0;
379    
380  while (length-- > 0)  while (length-- > 0)
381    {    {
382    if (utf8)  #if !defined NOUTF8
383      if (use_utf8)
384      {      {
385      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
386    
# Line 221  while (length-- > 0) Line 388  while (length-- > 0)
388        {        {
389        length -= rc - 1;        length -= rc - 1;
390        p += rc;        p += rc;
391        if (c < 256 && isprint(c))        if (PRINTHEX(c))
392          {          {
393          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
394          yield++;          yield++;
395          }          }
396        else        else
397          {          {
398          int n;          int n = 4;
399          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
400          yield += n;          yield += (n <= 0x000000ff)? 2 :
401                     (n <= 0x00000fff)? 3 :
402                     (n <= 0x0000ffff)? 4 :
403                     (n <= 0x000fffff)? 5 : 6;
404          }          }
405        continue;        continue;
406        }        }
407      }      }
408    #endif
409    
410     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
411    
412    if (isprint(c = *(p++)))    c = *p++;
413      if (PRINTHEX(c))
414      {      {
415      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
416      yield++;      yield++;
# Line 266  data is not zero. */ Line 438  data is not zero. */
438  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
439  {  {
440  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
441  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
442    
443  if (callout_extra)  if (callout_extra)
444    {    {
   int i;  
445    fprintf(f, "Callout %d: last capture = %d\n",    fprintf(f, "Callout %d: last capture = %d\n",
446      cb->callout_number, cb->capture_last);      cb->callout_number, cb->capture_last);
447    
# Line 298  pre_start = pchars((unsigned char *)cb-> Line 469  pre_start = pchars((unsigned char *)cb->
469  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
470    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
471    
472    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
473    
474  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
475    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
476    
477  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
478    
479  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
480  shown */  shown. For automatic callouts, show the pattern offset. */
481    
482  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
483    else fprintf(outfile, "%3d ", cb->callout_number);    {
484      fprintf(outfile, "%+3d ", cb->pattern_position);
485      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
486      }
487    else
488      {
489      if (callout_extra) fprintf(outfile, "    ");
490        else fprintf(outfile, "%3d ", cb->callout_number);
491      }
492    
493  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
494  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 318  if (post_start > 0) Line 499  if (post_start > 0)
499    fprintf(outfile, "^");    fprintf(outfile, "^");
500    }    }
501    
502  fprintf(outfile, "\n");  for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
503      fprintf(outfile, " ");
504    
505    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
506      pbuffer + cb->pattern_position);
507    
508    fprintf(outfile, "\n");
509  first_callout = 0;  first_callout = 0;
510    
511  if ((int)(cb->callout_data) != 0)  if (cb->callout_data != NULL)
512    {    {
513    fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
514    return (int)(cb->callout_data);    if (callout_data != 0)
515        {
516        fprintf(outfile, "Callout data = %d\n", callout_data);
517        return callout_data;
518        }
519    }    }
520    
521  return (cb->callout_number != callout_fail_id)? 0 :  return (cb->callout_number != callout_fail_id)? 0 :
# Line 334  return (cb->callout_number != callout_fa Line 524  return (cb->callout_number != callout_fa
524    
525    
526  /*************************************************  /*************************************************
527  *            Local malloc function               *  *            Local malloc functions              *
528  *************************************************/  *************************************************/
529    
530  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
# Line 342  compiled re. */ Line 532  compiled re. */
532    
533  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
534  {  {
535    void *block = malloc(size);
536  gotten_store = size;  gotten_store = size;
537  return malloc(size);  if (show_malloc)
538      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
539    return block;
540    }
541    
542    static void new_free(void *block)
543    {
544    if (show_malloc)
545      fprintf(outfile, "free             %p\n", block);
546    free(block);
547  }  }
548    
549    
550    /* For recursion malloc/free, to test stacking calls */
551    
552    static void *stack_malloc(size_t size)
553    {
554    void *block = malloc(size);
555    if (show_malloc)
556      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
557    return block;
558    }
559    
560    static void stack_free(void *block)
561    {
562    if (show_malloc)
563      fprintf(outfile, "stack_free       %p\n", block);
564    free(block);
565    }
566    
567    
568  /*************************************************  /*************************************************
569  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
# Line 364  if ((rc = pcre_fullinfo(re, study, optio Line 581  if ((rc = pcre_fullinfo(re, study, optio
581    
582    
583  /*************************************************  /*************************************************
584    *         Byte flipping function                 *
585    *************************************************/
586    
587    static unsigned long int
588    byteflip(unsigned long int value, int n)
589    {
590    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
591    return ((value & 0x000000ff) << 24) |
592           ((value & 0x0000ff00) <<  8) |
593           ((value & 0x00ff0000) >>  8) |
594           ((value & 0xff000000) >> 24);
595    }
596    
597    
598    
599    
600    /*************************************************
601    *        Check match or recursion limit          *
602    *************************************************/
603    
604    static int
605    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
606      int start_offset, int options, int *use_offsets, int use_size_offsets,
607      int flag, unsigned long int *limit, int errnumber, const char *msg)
608    {
609    int count;
610    int min = 0;
611    int mid = 64;
612    int max = -1;
613    
614    extra->flags |= flag;
615    
616    for (;;)
617      {
618      *limit = mid;
619    
620      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
621        use_offsets, use_size_offsets);
622    
623      if (count == errnumber)
624        {
625        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
626        min = mid;
627        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
628        }
629    
630      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
631                             count == PCRE_ERROR_PARTIAL)
632        {
633        if (mid == min + 1)
634          {
635          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
636          break;
637          }
638        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
639        max = mid;
640        mid = (min + mid)/2;
641        }
642      else break;    /* Some other error */
643      }
644    
645    extra->flags &= ~flag;
646    return count;
647    }
648    
649    
650    
651    /*************************************************
652    *         Check newline indicator                *
653    *************************************************/
654    
655    /* This is used both at compile and run-time to check for <xxx> escapes, where
656    xxx is LF, CR, CRLF, or ANY. Print a message and return 0 if there is no match.
657    
658    Arguments:
659      p           points after the leading '<'
660      f           file for error message
661    
662    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
663    */
664    
665    static int
666    check_newline(uschar *p, FILE *f)
667    {
668    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
669    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
670    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
671    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
672    fprintf(f, "Unknown newline type at: <%s\n", p);
673    return 0;
674    }
675    
676    
677    
678    /*************************************************
679    *             Usage function                     *
680    *************************************************/
681    
682    static void
683    usage(void)
684    {
685    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
686    printf("  -b       show compiled code (bytecode)\n");
687    printf("  -C       show PCRE compile-time options and exit\n");
688    printf("  -d       debug: show compiled code and information (-b and -i)\n");
689    #if !defined NODFA
690    printf("  -dfa     force DFA matching for all subjects\n");
691    #endif
692    printf("  -help    show usage information\n");
693    printf("  -i       show information about compiled patterns\n"
694           "  -m       output memory used information\n"
695           "  -o <n>   set size of offsets vector to <n>\n");
696    #if !defined NOPOSIX
697    printf("  -p       use POSIX interface\n");
698    #endif
699    printf("  -q       quiet: do not output PCRE version number at start\n");
700    printf("  -S <n>   set stack size to <n> megabytes\n");
701    printf("  -s       output store (memory) used information\n"
702           "  -t       time compilation and execution\n");
703    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
704    printf("  -tm      time execution (matching) only\n");
705    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
706    }
707    
708    
709    
710    /*************************************************
711  *                Main Program                    *  *                Main Program                    *
712  *************************************************/  *************************************************/
713    
# Line 378  int options = 0; Line 722  int options = 0;
722  int study_options = 0;  int study_options = 0;
723  int op = 1;  int op = 1;
724  int timeit = 0;  int timeit = 0;
725    int timeitm = 0;
726  int showinfo = 0;  int showinfo = 0;
727  int showstore = 0;  int showstore = 0;
728    int quiet = 0;
729  int size_offsets = 45;  int size_offsets = 45;
730  int size_offsets_max;  int size_offsets_max;
731  int *offsets;  int *offsets = NULL;
732  #if !defined NOPOSIX  #if !defined NOPOSIX
733  int posix = 0;  int posix = 0;
734  #endif  #endif
735  int debug = 0;  int debug = 0;
736  int done = 0;  int done = 0;
737  unsigned char buffer[30000];  int all_use_dfa = 0;
738  unsigned char dbuffer[1024];  int yield = 0;
739    int stack_size;
740    
741    /* These vectors store, end-to-end, a list of captured substring names. Assume
742    that 1024 is plenty long enough for the few names we'll be testing. */
743    
744    uschar copynames[1024];
745    uschar getnames[1024];
746    
747    uschar *copynamesptr;
748    uschar *getnamesptr;
749    
750    /* Get buffers from malloc() so that Electric Fence will check their misuse
751    when I am debugging. They grow automatically when very long lines are read. */
752    
753  /* Static so that new_malloc can use it. */  buffer = (unsigned char *)malloc(buffer_size);
754    dbuffer = (unsigned char *)malloc(buffer_size);
755    pbuffer = (unsigned char *)malloc(buffer_size);
756    
757    /* The outfile variable is static so that new_malloc can use it. */
758    
759  outfile = stdout;  outfile = stdout;
760    
761    /* The following  _setmode() stuff is some Windows magic that tells its runtime
762    library to translate CRLF into a single LF character. At least, that's what
763    I've been told: never having used Windows I take this all on trust. Originally
764    it set 0x8000, but then I was advised that _O_BINARY was better. */
765    
766    #if defined(_WIN32) || defined(WIN32)
767    _setmode( _fileno( stdout ), _O_BINARY );
768    #endif
769    
770  /* Scan options */  /* Scan options */
771    
772  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 403  while (argc > 1 && argv[op][0] == '-') Line 775  while (argc > 1 && argv[op][0] == '-')
775    
776    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
777      showstore = 1;      showstore = 1;
778    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
779      else if (strcmp(argv[op], "-b") == 0) debug = 1;
780    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
781    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
782    #if !defined NODFA
783      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
784    #endif
785    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
786        ((size_offsets = get_value(argv[op+1], &endptr)), *endptr == 0))        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
787            *endptr == 0))
788        {
789        op++;
790        argc--;
791        }
792      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
793        {
794        int both = argv[op][2] == 0;
795        int temp;
796        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
797                         *endptr == 0))
798          {
799          timeitm = temp;
800          op++;
801          argc--;
802          }
803        else timeitm = LOOPREPEAT;
804        if (both) timeit = timeitm;
805        }
806      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
807          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
808            *endptr == 0))
809      {      {
810    #if defined(_WIN32) || defined(WIN32)
811        printf("PCRE: -S not supported on this OS\n");
812        exit(1);
813    #else
814        int rc;
815        struct rlimit rlim;
816        getrlimit(RLIMIT_STACK, &rlim);
817        rlim.rlim_cur = stack_size * 1024 * 1024;
818        rc = setrlimit(RLIMIT_STACK, &rlim);
819        if (rc != 0)
820          {
821        printf("PCRE: setrlimit() failed with error %d\n", rc);
822        exit(1);
823          }
824      op++;      op++;
825      argc--;      argc--;
826    #endif
827      }      }
828  #if !defined NOPOSIX  #if !defined NOPOSIX
829    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
# Line 422  while (argc > 1 && argv[op][0] == '-') Line 835  while (argc > 1 && argv[op][0] == '-')
835      printf("Compiled with\n");      printf("Compiled with\n");
836      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
837      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
838        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
839        printf("  %sUnicode properties support\n", rc? "" : "No ");
840      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
841      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
842          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
843          (rc == -1)? "ANY" : "???");
844      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
845      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
846      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
847      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
848      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
849      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
850        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
851        printf("  Default recursion depth limit = %d\n", rc);
852        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
853        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
854      exit(0);      exit(0);
855      }      }
856      else if (strcmp(argv[op], "-help") == 0 ||
857               strcmp(argv[op], "--help") == 0)
858        {
859        usage();
860        goto EXIT;
861        }
862    else    else
863      {      {
864      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
865      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
866      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
867      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
868      }      }
869    op++;    op++;
870    argc--;    argc--;
# Line 454  while (argc > 1 && argv[op][0] == '-') Line 873  while (argc > 1 && argv[op][0] == '-')
873  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
874    
875  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
876  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
877  if (offsets == NULL)  if (offsets == NULL)
878    {    {
879    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
880      size_offsets_max * sizeof(int));      size_offsets_max * sizeof(int));
881    return 1;    yield = 1;
882      goto EXIT;
883    }    }
884    
885  /* Sort out the input and output files */  /* Sort out the input and output files */
886    
887  if (argc > 1)  if (argc > 1)
888    {    {
889    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
890    if (infile == NULL)    if (infile == NULL)
891      {      {
892      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
893      return 1;      yield = 1;
894        goto EXIT;
895      }      }
896    }    }
897    
898  if (argc > 2)  if (argc > 2)
899    {    {
900    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
901    if (outfile == NULL)    if (outfile == NULL)
902      {      {
903      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
904      return 1;      yield = 1;
905        goto EXIT;
906      }      }
907    }    }
908    
909  /* Set alternative malloc function */  /* Set alternative malloc function */
910    
911  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
912    pcre_free = new_free;
913    pcre_stack_malloc = stack_malloc;
914    pcre_stack_free = stack_free;
915    
916  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
917    
918  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
919    
920  /* Main loop */  /* Main loop */
921    
# Line 506  while (!done) Line 931  while (!done)
931    
932    const char *error;    const char *error;
933    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
934      unsigned char *to_file = NULL;
935    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
936      unsigned long int true_size, true_study_size = 0;
937      size_t size, regex_gotten_store;
938    int do_study = 0;    int do_study = 0;
939    int do_debug = debug;    int do_debug = debug;
940    int do_G = 0;    int do_G = 0;
941    int do_g = 0;    int do_g = 0;
942    int do_showinfo = showinfo;    int do_showinfo = showinfo;
943    int do_showrest = 0;    int do_showrest = 0;
944    int erroroffset, len, delimiter;    int do_flip = 0;
945      int erroroffset, len, delimiter, poffset;
946    
947    utf8 = 0;    use_utf8 = 0;
948    
949    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
950    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
951    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
952    fflush(outfile);    fflush(outfile);
953    
# Line 526  while (!done) Line 955  while (!done)
955    while (isspace(*p)) p++;    while (isspace(*p)) p++;
956    if (*p == 0) continue;    if (*p == 0) continue;
957    
958    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
959    complete, read more. */  
960      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
961        {
962        unsigned long int magic, get_options;
963        uschar sbuf[8];
964        FILE *f;
965    
966        p++;
967        pp = p + (int)strlen((char *)p);
968        while (isspace(pp[-1])) pp--;
969        *pp = 0;
970    
971        f = fopen((char *)p, "rb");
972        if (f == NULL)
973          {
974          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
975          continue;
976          }
977    
978        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
979    
980        true_size =
981          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
982        true_study_size =
983          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
984    
985        re = (real_pcre *)new_malloc(true_size);
986        regex_gotten_store = gotten_store;
987    
988        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
989    
990        magic = ((real_pcre *)re)->magic_number;
991        if (magic != MAGIC_NUMBER)
992          {
993          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
994            {
995            do_flip = 1;
996            }
997          else
998            {
999            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1000            fclose(f);
1001            continue;
1002            }
1003          }
1004    
1005        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1006          do_flip? " (byte-inverted)" : "", p);
1007    
1008        /* Need to know if UTF-8 for printing data strings */
1009    
1010        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1011        use_utf8 = (get_options & PCRE_UTF8) != 0;
1012    
1013        /* Now see if there is any following study data */
1014    
1015        if (true_study_size != 0)
1016          {
1017          pcre_study_data *psd;
1018    
1019          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1020          extra->flags = PCRE_EXTRA_STUDY_DATA;
1021    
1022          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1023          extra->study_data = psd;
1024    
1025          if (fread(psd, 1, true_study_size, f) != true_study_size)
1026            {
1027            FAIL_READ:
1028            fprintf(outfile, "Failed to read data from %s\n", p);
1029            if (extra != NULL) new_free(extra);
1030            if (re != NULL) new_free(re);
1031            fclose(f);
1032            continue;
1033            }
1034          fprintf(outfile, "Study data loaded from %s\n", p);
1035          do_study = 1;     /* To get the data output if requested */
1036          }
1037        else fprintf(outfile, "No study data\n");
1038    
1039        fclose(f);
1040        goto SHOW_INFO;
1041        }
1042    
1043      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1044      the pattern; if is isn't complete, read more. */
1045    
1046    delimiter = *p++;    delimiter = *p++;
1047    
# Line 538  while (!done) Line 1052  while (!done)
1052      }      }
1053    
1054    pp = p;    pp = p;
1055      poffset = p - buffer;
1056    
1057    for(;;)    for(;;)
1058      {      {
# Line 548  while (!done) Line 1063  while (!done)
1063        pp++;        pp++;
1064        }        }
1065      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1066      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1067      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1068        {        {
1069        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1070        done = 1;        done = 1;
# Line 566  while (!done) Line 1073  while (!done)
1073      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1074      }      }
1075    
1076      /* The buffer may have moved while being extended; reset the start of data
1077      pointer to the correct relative point in the buffer. */
1078    
1079      p = buffer + poffset;
1080    
1081    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1082    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1083    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1084    
1085    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1086    
1087    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1088      for callouts. */
1089    
1090    *pp++ = 0;    *pp++ = 0;
1091      strcpy((char *)pbuffer, (char *)p);
1092    
1093    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1094    
# Line 586  while (!done) Line 1100  while (!done)
1100      {      {
1101      switch (*pp++)      switch (*pp++)
1102        {        {
1103          case 'f': options |= PCRE_FIRSTLINE; break;
1104        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1105        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1106        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 594  while (!done) Line 1109  while (!done)
1109    
1110        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1111        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1112          case 'B': do_debug = 1; break;
1113          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1114        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1115        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1116          case 'F': do_flip = 1; break;
1117        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1118        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1119          case 'J': options |= PCRE_DUPNAMES; break;
1120        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1121        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1122    
# Line 608  while (!done) Line 1127  while (!done)
1127        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1128        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1129        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1130        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1131          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1132    
1133        case 'L':        case 'L':
1134        ppp = pp;        ppp = pp;
1135        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1136          /* The '0' test is just in case this is an unterminated line. */
1137          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1138        *ppp = 0;        *ppp = 0;
1139        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1140          {          {
1141          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1142          goto SKIP_DATA;          goto SKIP_DATA;
1143          }          }
1144          locale_set = 1;
1145        tables = pcre_maketables();        tables = pcre_maketables();
1146        pp = ppp;        pp = ppp;
1147        break;        break;
1148    
1149        case '\n': case ' ': break;        case '>':
1150          to_file = pp;
1151          while (*pp != 0) pp++;
1152          while (isspace(pp[-1])) pp--;
1153          *pp = 0;
1154          break;
1155    
1156          case '<':
1157            {
1158            int x = check_newline(pp, outfile);
1159            if (x == 0) goto SKIP_DATA;
1160            options |= x;
1161            while (*pp++ != '>');
1162            }
1163          break;
1164    
1165          case '\r':                      /* So that it works in Windows */
1166          case '\n':
1167          case ' ':
1168          break;
1169    
1170        default:        default:
1171        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1172        goto SKIP_DATA;        goto SKIP_DATA;
# Line 639  while (!done) Line 1182  while (!done)
1182      {      {
1183      int rc;      int rc;
1184      int cflags = 0;      int cflags = 0;
1185    
1186      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1187      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1188        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1189        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1190        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1191    
1192      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1193    
1194      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 648  while (!done) Line 1196  while (!done)
1196    
1197      if (rc != 0)      if (rc != 0)
1198        {        {
1199        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1200        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1201        goto SKIP_DATA;        goto SKIP_DATA;
1202        }        }
# Line 660  while (!done) Line 1208  while (!done)
1208  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1209    
1210      {      {
1211      if (timeit)      if (timeit > 0)
1212        {        {
1213        register int i;        register int i;
1214        clock_t time_taken;        clock_t time_taken;
1215        clock_t start_time = clock();        clock_t start_time = clock();
1216        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1217          {          {
1218          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1219          if (re != NULL) free(re);          if (re != NULL) free(re);
1220          }          }
1221        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1222        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1223          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1224            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1225        }        }
1226    
# Line 689  while (!done) Line 1237  while (!done)
1237          {          {
1238          for (;;)          for (;;)
1239            {            {
1240            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1241              {              {
1242              done = 1;              done = 1;
1243              goto CONTINUE;              goto CONTINUE;
# Line 713  while (!done) Line 1261  while (!done)
1261                sizeof(real_pcre) -                sizeof(real_pcre) -
1262                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1263    
1264        /* Extract the size for possible writing before possibly flipping it,
1265        and remember the store that was got. */
1266    
1267        true_size = ((real_pcre *)re)->size;
1268        regex_gotten_store = gotten_store;
1269    
1270        /* If /S was present, study the regexp to generate additional info to
1271        help with the matching. */
1272    
1273        if (do_study)
1274          {
1275          if (timeit > 0)
1276            {
1277            register int i;
1278            clock_t time_taken;
1279            clock_t start_time = clock();
1280            for (i = 0; i < timeit; i++)
1281              extra = pcre_study(re, study_options, &error);
1282            time_taken = clock() - start_time;
1283            if (extra != NULL) free(extra);
1284            fprintf(outfile, "  Study time %.4f milliseconds\n",
1285              (((double)time_taken * 1000.0) / (double)timeit) /
1286                (double)CLOCKS_PER_SEC);
1287            }
1288          extra = pcre_study(re, study_options, &error);
1289          if (error != NULL)
1290            fprintf(outfile, "Failed to study: %s\n", error);
1291          else if (extra != NULL)
1292            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1293          }
1294    
1295        /* If the 'F' option was present, we flip the bytes of all the integer
1296        fields in the regex data block and the study block. This is to make it
1297        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1298        compiled on a different architecture. */
1299    
1300        if (do_flip)
1301          {
1302          real_pcre *rre = (real_pcre *)re;
1303          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1304          rre->size = byteflip(rre->size, sizeof(rre->size));
1305          rre->options = byteflip(rre->options, sizeof(rre->options));
1306          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1307          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1308          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1309          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1310          rre->name_table_offset = byteflip(rre->name_table_offset,
1311            sizeof(rre->name_table_offset));
1312          rre->name_entry_size = byteflip(rre->name_entry_size,
1313            sizeof(rre->name_entry_size));
1314          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1315    
1316          if (extra != NULL)
1317            {
1318            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1319            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1320            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1321            }
1322          }
1323    
1324        /* Extract information from the compiled data if required */
1325    
1326        SHOW_INFO:
1327    
1328        if (do_debug)
1329          {
1330          fprintf(outfile, "------------------------------------------------------------------\n");
1331          pcre_printint(re, outfile);
1332          }
1333    
1334      if (do_showinfo)      if (do_showinfo)
1335        {        {
1336        unsigned long int get_options;        unsigned long int get_options, all_options;
1337    #if !defined NOINFOCHECK
1338        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1339    #endif
1340        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1341        int nameentrysize, namecount;        int nameentrysize, namecount;
1342        const uschar *nametable;        const uschar *nametable;
       size_t size;  
   
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
1343    
1344        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1345        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
# Line 736  while (!done) Line 1349  while (!done)
1349        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1350        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1351        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1352        new_info(re, NULL, PCRE_INFO_NAMETABLE, &nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1353    
1354    #if !defined NOINFOCHECK
1355        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1356        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1357          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 755  while (!done) Line 1369  while (!done)
1369            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1370              get_options, old_options);              get_options, old_options);
1371          }          }
1372    #endif
1373    
1374        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1375          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1376          size, gotten_store);          (int)size, (int)regex_gotten_store);
1377    
1378        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1379        if (backrefmax > 0)        if (backrefmax > 0)
# Line 776  while (!done) Line 1391  while (!done)
1391            }            }
1392          }          }
1393    
1394          /* The NOPARTIAL bit is a private bit in the options, so we have
1395          to fish it out via out back door */
1396    
1397          all_options = ((real_pcre *)re)->options;
1398          if (do_flip)
1399            {
1400            all_options = byteflip(all_options, sizeof(all_options));
1401             }
1402    
1403          if ((all_options & PCRE_NOPARTIAL) != 0)
1404            fprintf(outfile, "Partial matching not supported\n");
1405    
1406        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1407          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1408            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1409            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1410            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1411            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1412              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1413            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1414            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1415            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1416            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1417            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1418              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1419              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1420              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1421    
1422        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        switch (get_options & PCRE_NEWLINE_BITS)
1423          fprintf(outfile, "Case state changes\n");          {
1424            case PCRE_NEWLINE_CR:
1425            fprintf(outfile, "Forced newline sequence: CR\n");
1426            break;
1427    
1428            case PCRE_NEWLINE_LF:
1429            fprintf(outfile, "Forced newline sequence: LF\n");
1430            break;
1431    
1432            case PCRE_NEWLINE_CRLF:
1433            fprintf(outfile, "Forced newline sequence: CRLF\n");
1434            break;
1435    
1436            case PCRE_NEWLINE_ANY:
1437            fprintf(outfile, "Forced newline sequence: ANY\n");
1438            break;
1439    
1440            default:
1441            break;
1442            }
1443    
1444        if (first_char == -1)        if (first_char == -1)
1445          {          {
1446          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1447          }          }
1448        else if (first_char < 0)        else if (first_char < 0)
1449          {          {
# Line 802  while (!done) Line 1452  while (!done)
1452        else        else
1453          {          {
1454          int ch = first_char & 255;          int ch = first_char & 255;
1455          char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1456            "" : " (caseless)";            "" : " (caseless)";
1457          if (isprint(ch))          if (PRINTHEX(ch))
1458            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1459          else          else
1460            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 817  while (!done) Line 1467  while (!done)
1467        else        else
1468          {          {
1469          int ch = need_char & 255;          int ch = need_char & 255;
1470          char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1471            "" : " (caseless)";            "" : " (caseless)";
1472          if (isprint(ch))          if (PRINTHEX(ch))
1473            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1474          else          else
1475            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1476          }          }
       }  
1477    
1478      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1479      help with the matching. */        value, but it varies, depending on the computer architecture, and
1480          so messes up the test suite. (And with the /F option, it might be
1481          flipped.) */
1482    
1483      if (do_study)        if (do_study)
       {  
       if (timeit)  
1484          {          {
1485          register int i;          if (extra == NULL)
1486          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1487          clock_t start_time = clock();          else
1488          for (i = 0; i < LOOPREPEAT; i++)            {
1489            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1490          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1491          if (extra != NULL) free(extra);  
1492          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1493            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /              fprintf(outfile, "No starting byte set\n");
1494              (double)CLOCKS_PER_SEC);            else
1495                {
1496                int i;
1497                int c = 24;
1498                fprintf(outfile, "Starting byte set: ");
1499                for (i = 0; i < 256; i++)
1500                  {
1501                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1502                    {
1503                    if (c > 75)
1504                      {
1505                      fprintf(outfile, "\n  ");
1506                      c = 2;
1507                      }
1508                    if (PRINTHEX(i) && i != ' ')
1509                      {
1510                      fprintf(outfile, "%c ", i);
1511                      c += 2;
1512                      }
1513                    else
1514                      {
1515                      fprintf(outfile, "\\x%02x ", i);
1516                      c += 5;
1517                      }
1518                    }
1519                  }
1520                fprintf(outfile, "\n");
1521                }
1522              }
1523          }          }
1524          }
1525    
1526        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1527        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1528          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1529    
1530        else if (do_showinfo)      if (to_file != NULL)
1531          {
1532          FILE *f = fopen((char *)to_file, "wb");
1533          if (f == NULL)
1534          {          {
1535          size_t size;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1536          uschar *start_bits = NULL;          }
1537          new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);        else
1538          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          {
1539          fprintf(outfile, "Study size = %d\n", size);          uschar sbuf[8];
1540          if (start_bits == NULL)          sbuf[0] = (true_size >> 24)  & 255;
1541            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1542            sbuf[2] = (true_size >>  8)  & 255;
1543            sbuf[3] = (true_size)  & 255;
1544    
1545            sbuf[4] = (true_study_size >> 24)  & 255;
1546            sbuf[5] = (true_study_size >> 16)  & 255;
1547            sbuf[6] = (true_study_size >>  8)  & 255;
1548            sbuf[7] = (true_study_size)  & 255;
1549    
1550            if (fwrite(sbuf, 1, 8, f) < 8 ||
1551                fwrite(re, 1, true_size, f) < true_size)
1552              {
1553              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1554              }
1555          else          else
1556            {            {
1557            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1558            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1559              {              {
1560              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1561                    true_study_size)
1562                {                {
1563                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1564                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1565                }                }
1566                else fprintf(outfile, "Study data written to %s\n", to_file);
1567    
1568              }              }
           fprintf(outfile, "\n");  
1569            }            }
1570            fclose(f);
1571          }          }
1572    
1573          new_free(re);
1574          if (extra != NULL) new_free(extra);
1575          if (tables != NULL) new_free((void *)tables);
1576          continue;  /* With next regex */
1577        }        }
1578      }      }        /* End of non-POSIX compile */
1579    
1580    /* Read data lines and test them */    /* Read data lines and test them */
1581    
1582    for (;;)    for (;;)
1583      {      {
1584      unsigned char *q;      uschar *q;
1585      unsigned char *bptr = dbuffer;      uschar *bptr = dbuffer;
1586      int *use_offsets = offsets;      int *use_offsets = offsets;
1587      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1588      int callout_data = 0;      int callout_data = 0;
# Line 910  while (!done) Line 1595  while (!done)
1595      int gmatched = 0;      int gmatched = 0;
1596      int start_offset = 0;      int start_offset = 0;
1597      int g_notempty = 0;      int g_notempty = 0;
1598        int use_dfa = 0;
1599    
1600      options = 0;      options = 0;
1601    
1602        *copynames = 0;
1603        *getnames = 0;
1604    
1605        copynamesptr = copynames;
1606        getnamesptr = getnames;
1607    
1608      pcre_callout = callout;      pcre_callout = callout;
1609      first_callout = 1;      first_callout = 1;
1610      callout_extra = 0;      callout_extra = 0;
1611      callout_count = 0;      callout_count = 0;
1612      callout_fail_count = 999999;      callout_fail_count = 999999;
1613      callout_fail_id = -1;      callout_fail_id = -1;
1614        show_malloc = 0;
1615    
1616        if (extra != NULL) extra->flags &=
1617          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1618    
1619      if (infile == stdin) printf("data> ");      len = 0;
1620      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      for (;;)
1621        {        {
1622        done = 1;        if (infile == stdin) printf("data> ");
1623        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1624            {
1625            if (len > 0) break;
1626            done = 1;
1627            goto CONTINUE;
1628            }
1629          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1630          len = (int)strlen((char *)buffer);
1631          if (buffer[len-1] == '\n') break;
1632        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1633    
     len = (int)strlen((char *)buffer);  
1634      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1635      buffer[len] = 0;      buffer[len] = 0;
1636      if (len == 0) break;      if (len == 0) break;
# Line 958  while (!done) Line 1660  while (!done)
1660          c -= '0';          c -= '0';
1661          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1662            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1663    
1664    #if !defined NOUTF8
1665            if (use_utf8 && c > 255)
1666              {
1667              unsigned char buff8[8];
1668              int ii, utn;
1669              utn = ord2utf8(c, buff8);
1670              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1671              c = buff8[ii];   /* Last byte */
1672              }
1673    #endif
1674          break;          break;
1675    
1676          case 'x':          case 'x':
1677    
1678          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1679    
1680    #if !defined NOUTF8
1681          if (*p == '{')          if (*p == '{')
1682            {            {
1683            unsigned char *pt = p;            unsigned char *pt = p;
# Line 972  while (!done) Line 1686  while (!done)
1686              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1687            if (*pt == '}')            if (*pt == '}')
1688              {              {
1689              unsigned char buffer[8];              unsigned char buff8[8];
1690              int ii, utn;              int ii, utn;
1691              utn = ord2utf8(c, buffer);              utn = ord2utf8(c, buff8);
1692              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1693              c = buffer[ii];   /* Last byte */              c = buff8[ii];   /* Last byte */
1694              p = pt + 1;              p = pt + 1;
1695              break;              break;
1696              }              }
1697            /* Not correct form; fall through */            /* Not correct form; fall through */
1698            }            }
1699    #endif
1700    
1701          /* Ordinary \x */          /* Ordinary \x */
1702    
# Line 993  while (!done) Line 1708  while (!done)
1708            }            }
1709          break;          break;
1710    
1711          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1712          p--;          p--;
1713          continue;          continue;
1714    
1715            case '>':
1716            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1717            continue;
1718    
1719          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1720          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1721          continue;          continue;
# Line 1013  while (!done) Line 1732  while (!done)
1732            }            }
1733          else if (isalnum(*p))          else if (isalnum(*p))
1734            {            {
1735            uschar name[256];            uschar *npp = copynamesptr;
1736            uschar *pp = name;            while (isalnum(*p)) *npp++ = *p++;
1737            while (isalnum(*p)) *pp++ = *p++;            *npp++ = 0;
1738            *pp = 0;            *npp = 0;
1739            n = pcre_get_stringnumber(re, name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1740            if (n < 0)            if (n < 0)
1741              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1742            else copystrings |= 1 << n;            copynamesptr = npp;
1743            }            }
1744          else if (*p == '+')          else if (*p == '+')
1745            {            {
# Line 1058  while (!done) Line 1777  while (!done)
1777            }            }
1778          continue;          continue;
1779    
1780    #if !defined NODFA
1781            case 'D':
1782    #if !defined NOPOSIX
1783            if (posix || do_posix)
1784              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1785            else
1786    #endif
1787              use_dfa = 1;
1788            continue;
1789    
1790            case 'F':
1791            options |= PCRE_DFA_SHORTEST;
1792            continue;
1793    #endif
1794    
1795          case 'G':          case 'G':
1796          if (isdigit(*p))          if (isdigit(*p))
1797            {            {
# Line 1066  while (!done) Line 1800  while (!done)
1800            }            }
1801          else if (isalnum(*p))          else if (isalnum(*p))
1802            {            {
1803            uschar name[256];            uschar *npp = getnamesptr;
1804            uschar *pp = name;            while (isalnum(*p)) *npp++ = *p++;
1805            while (isalnum(*p)) *pp++ = *p++;            *npp++ = 0;
1806            *pp = 0;            *npp = 0;
1807            n = pcre_get_stringnumber(re, name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1808            if (n < 0)            if (n < 0)
1809              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1810            else getstrings |= 1 << n;            getnamesptr = npp;
1811            }            }
1812          continue;          continue;
1813    
# Line 1095  while (!done) Line 1829  while (!done)
1829            {            {
1830            size_offsets_max = n;            size_offsets_max = n;
1831            free(offsets);            free(offsets);
1832            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1833            if (offsets == NULL)            if (offsets == NULL)
1834              {              {
1835              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1836                size_offsets_max * sizeof(int));                size_offsets_max * sizeof(int));
1837              return 1;              yield = 1;
1838                goto EXIT;
1839              }              }
1840            }            }
1841          use_size_offsets = n;          use_size_offsets = n;
1842          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1843          continue;          continue;
1844    
1845            case 'P':
1846            options |= PCRE_PARTIAL;
1847            continue;
1848    
1849            case 'Q':
1850            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1851            if (extra == NULL)
1852              {
1853              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1854              extra->flags = 0;
1855              }
1856            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1857            extra->match_limit_recursion = n;
1858            continue;
1859    
1860            case 'q':
1861            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1862            if (extra == NULL)
1863              {
1864              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1865              extra->flags = 0;
1866              }
1867            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1868            extra->match_limit = n;
1869            continue;
1870    
1871    #if !defined NODFA
1872            case 'R':
1873            options |= PCRE_DFA_RESTART;
1874            continue;
1875    #endif
1876    
1877            case 'S':
1878            show_malloc = 1;
1879            continue;
1880    
1881          case 'Z':          case 'Z':
1882          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1883          continue;          continue;
1884    
1885            case '?':
1886            options |= PCRE_NO_UTF8_CHECK;
1887            continue;
1888    
1889            case '<':
1890              {
1891              int x = check_newline(p, outfile);
1892              if (x == 0) goto NEXT_DATA;
1893              options |= x;
1894              while (*p++ != '>');
1895              }
1896            continue;
1897          }          }
1898        *q++ = c;        *q++ = c;
1899        }        }
1900      *q = 0;      *q = 0;
1901      len = q - dbuffer;      len = q - dbuffer;
1902    
1903        if ((all_use_dfa || use_dfa) && find_match_limit)
1904          {
1905          printf("**Match limit not relevant for DFA matching: ignored\n");
1906          find_match_limit = 0;
1907          }
1908    
1909      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1910      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1911    
# Line 1126  while (!done) Line 1916  while (!done)
1916        int eflags = 0;        int eflags = 0;
1917        regmatch_t *pmatch = NULL;        regmatch_t *pmatch = NULL;
1918        if (use_size_offsets > 0)        if (use_size_offsets > 0)
1919          pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1920        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1921        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1922    
# Line 1134  while (!done) Line 1924  while (!done)
1924    
1925        if (rc != 0)        if (rc != 0)
1926          {          {
1927          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1928          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1929          }          }
1930          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1931                  != 0)
1932            {
1933            fprintf(outfile, "Matched with REG_NOSUB\n");
1934            }
1935        else        else
1936          {          {
1937          size_t i;          size_t i;
# Line 1168  while (!done) Line 1963  while (!done)
1963    
1964      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1965        {        {
1966        if (timeit)        if (timeitm > 0)
1967          {          {
1968          register int i;          register int i;
1969          clock_t time_taken;          clock_t time_taken;
1970          clock_t start_time = clock();          clock_t start_time = clock();
1971          for (i = 0; i < LOOPREPEAT; i++)  
1972    #if !defined NODFA
1973            if (all_use_dfa || use_dfa)
1974              {
1975              int workspace[1000];
1976              for (i = 0; i < timeitm; i++)
1977                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1978                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1979                  sizeof(workspace)/sizeof(int));
1980              }
1981            else
1982    #endif
1983    
1984            for (i = 0; i < timeitm; i++)
1985            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1986              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1987    
1988          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1989          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
1990            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
1991              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
1992          }          }
1993    
1994        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
1995        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
1996          for the recursion limit. */
1997    
1998        if (find_match_limit)        if (find_match_limit)
1999          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2000          if (extra == NULL)          if (extra == NULL)
2001            {            {
2002            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2003            extra->flags = 0;            extra->flags = 0;
2004            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
2005    
2006          for (;;)          (void)check_match_limit(re, extra, bptr, len, start_offset,
2007            {            options|g_notempty, use_offsets, use_size_offsets,
2008            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2009            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
2010              options | g_notempty, use_offsets, use_size_offsets);  
2011            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
2012              {            options|g_notempty, use_offsets, use_size_offsets,
2013              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2014              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
2015          }          }
2016    
2017        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1232  while (!done) Line 2020  while (!done)
2020          {          {
2021          if (extra == NULL)          if (extra == NULL)
2022            {            {
2023            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2024            extra->flags = 0;            extra->flags = 0;
2025            }            }
2026          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2027          extra->callout_data = (void *)callout_data;          extra->callout_data = &callout_data;
2028          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2029            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
2030          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
# Line 1245  while (!done) Line 2033  while (!done)
2033        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2034        value of match_limit. */        value of match_limit. */
2035    
2036        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
2037          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
2038            {
2039            int workspace[1000];
2040            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2041              options | g_notempty, use_offsets, use_size_offsets, workspace,
2042              sizeof(workspace)/sizeof(int));
2043            if (count == 0)
2044              {
2045              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2046              count = use_size_offsets/2;
2047              }
2048            }
2049    #endif
2050    
2051        if (count == 0)        else
2052          {          {
2053          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2054          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2055            if (count == 0)
2056              {
2057              fprintf(outfile, "Matched, but too many substrings\n");
2058              count = use_size_offsets/3;
2059              }
2060          }          }
2061    
2062        /* Matched */        /* Matched */
2063    
2064        if (count >= 0)        if (count >= 0)
2065          {          {
2066          int i;          int i, maxcount;
2067    
2068    #if !defined NODFA
2069            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2070    #endif
2071              maxcount = use_size_offsets/3;
2072    
2073            /* This is a check against a lunatic return value. */
2074    
2075            if (count > maxcount)
2076              {
2077              fprintf(outfile,
2078                "** PCRE error: returned count %d is too big for offset size %d\n",
2079                count, use_size_offsets);
2080              count = use_size_offsets/3;
2081              if (do_g || do_G)
2082                {
2083                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2084                do_g = do_G = FALSE;        /* Break g/G loop */
2085                }
2086              }
2087    
2088          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2089            {            {
2090            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1286  while (!done) Line 2112  while (!done)
2112            {            {
2113            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2114              {              {
2115              char copybuffer[16];              char copybuffer[256];
2116              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2117                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2118              if (rc < 0)              if (rc < 0)
# Line 1296  while (!done) Line 2122  while (!done)
2122              }              }
2123            }            }
2124    
2125            for (copynamesptr = copynames;
2126                 *copynamesptr != 0;
2127                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2128              {
2129              char copybuffer[256];
2130              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2131                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2132              if (rc < 0)
2133                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2134              else
2135                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2136              }
2137    
2138          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2139            {            {
2140            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1308  while (!done) Line 2147  while (!done)
2147              else              else
2148                {                {
2149                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2150                pcre_free_substring(substring);                pcre_free_substring(substring);
2151                }                }
2152              }              }
2153            }            }
2154    
2155            for (getnamesptr = getnames;
2156                 *getnamesptr != 0;
2157                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2158              {
2159              const char *substring;
2160              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2161                count, (char *)getnamesptr, &substring);
2162              if (rc < 0)
2163                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2164              else
2165                {
2166                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2167                pcre_free_substring(substring);
2168                }
2169              }
2170    
2171          if (getlist)          if (getlist)
2172            {            {
2173            const char **stringlist;            const char **stringlist;
# Line 1333  while (!done) Line 2187  while (!done)
2187            }            }
2188          }          }
2189    
2190          /* There was a partial match */
2191    
2192          else if (count == PCRE_ERROR_PARTIAL)
2193            {
2194            fprintf(outfile, "Partial match");
2195    #if !defined NODFA
2196            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2197              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2198                bptr + use_offsets[0]);
2199    #endif
2200            fprintf(outfile, "\n");
2201            break;  /* Out of the /g loop */
2202            }
2203    
2204        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2205        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
2206        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
2207        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
2208        was checked before setting g_notempty. */        offset values to achieve this. We won't be at the end of the string -
2209          that was checked before setting g_notempty. */
2210    
2211        else        else
2212          {          {
2213          if (g_notempty != 0)          if (g_notempty != 0)
2214            {            {
2215              int onechar = 1;
2216            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2217            use_offsets[1] = start_offset + 1;            if (use_utf8)
2218                {
2219                while (start_offset + onechar < len)
2220                  {
2221                  int tb = bptr[start_offset+onechar];
2222                  if (tb <= 127) break;
2223                  tb &= 0xc0;
2224                  if (tb != 0 && tb != 0xc0) onechar++;
2225                  }
2226                }
2227              use_offsets[1] = start_offset + onechar;
2228            }            }
2229          else          else
2230            {            {
2231            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2232              {              {
2233              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
2234              }              }
2235              else fprintf(outfile, "Error %d\n", count);
2236            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2237            }            }
2238          }          }
# Line 1387  while (!done) Line 2267  while (!done)
2267          len -= use_offsets[1];          len -= use_offsets[1];
2268          }          }
2269        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2270    
2271        NEXT_DATA: continue;
2272      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2273    
2274    CONTINUE:    CONTINUE:
# Line 1395  while (!done) Line 2277  while (!done)
2277    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2278  #endif  #endif
2279    
2280    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2281    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2282    if (tables != NULL)    if (tables != NULL)
2283      {      {
2284      free((void *)tables);      new_free((void *)tables);
2285      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2286        locale_set = 0;
2287      }      }
2288    }    }
2289    
2290  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2291  return 0;  
2292    EXIT:
2293    
2294    if (infile != NULL && infile != stdin) fclose(infile);
2295    if (outfile != NULL && outfile != stdout) fclose(outfile);
2296    
2297    free(buffer);
2298    free(dbuffer);
2299    free(pbuffer);
2300    free(offsets);
2301    
2302    return yield;
2303  }  }
2304    
2305  /* End */  /* End of pcretest.c */

Legend:
Removed from v.63  
changed lines
  Added in v.96

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12