/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 73 by nigel, Sat Feb 24 21:40:30 2007 UTC revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include <config.h>
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 46  been extended and consequently is now ra
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    
52  /* We need the internal info for displaying the results of pcre_study(). Also  /* A number of things vary for Windows builds. Originally, pcretest opened its
53  for getting the opcodes for showing compiled code. */  input and output without "b"; then I was told that "b" was needed in some
54    environments, so it was added for release 5.0 to both the input and output. (It
55    makes no difference on Unix-like systems.) Later I was told that it is wrong
56    for the input on Windows. I've now abstracted the modes into two macros that
57    are set here, to make it easier to fiddle with them, and removed "b" from the
58    input mode under Windows. */
59    
60    #if defined(_WIN32) || defined(WIN32)
61    #include <io.h>                /* For _setmode() */
62    #include <fcntl.h>             /* For _O_BINARY */
63    #define INPUT_MODE   "r"
64    #define OUTPUT_MODE  "wb"
65    
66    #else
67    #include <sys/time.h>          /* These two includes are needed */
68    #include <sys/resource.h>      /* for setrlimit(). */
69    #define INPUT_MODE   "rb"
70    #define OUTPUT_MODE  "wb"
71    #endif
72    
73    
74    /* We have to include pcre_internal.h because we need the internal info for
75    displaying the results of pcre_study() and we also need to know about the
76    internal macros, structures, and other internal data values; pcretest has
77    "inside information" compared to a program that strictly follows the PCRE API.
78    
79    Although pcre_internal.h does itself include pcre.h, we explicitly include it
80    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81    appropriately for an application, not for building PCRE. */
82    
83    #include "pcre.h"
84    #include "pcre_internal.h"
85    
86    /* We need access to the data tables that PCRE uses. So as not to have to keep
87    two copies, we include the source file here, changing the names of the external
88    symbols to prevent clashes. */
89    
90    #define _pcre_utf8_table1      utf8_table1
91    #define _pcre_utf8_table1_size utf8_table1_size
92    #define _pcre_utf8_table2      utf8_table2
93    #define _pcre_utf8_table3      utf8_table3
94    #define _pcre_utf8_table4      utf8_table4
95    #define _pcre_utt              utt
96    #define _pcre_utt_size         utt_size
97    #define _pcre_OP_lengths       OP_lengths
98    
99    #include "pcre_tables.c"
100    
101    /* We also need the pcre_printint() function for printing out compiled
102    patterns. This function is in a separate file so that it can be included in
103    pcre_compile.c when that module is compiled with debugging enabled.
104    
105    The definition of the macro PRINTABLE, which determines whether to print an
106    output character as-is or as a hex value when showing compiled patterns, is
107    contained in this file. We uses it here also, in cases when the locale has not
108    been explicitly changed, so as to get consistent output from systems that
109    differ in their output from isprint() even in the "C" locale. */
110    
111    #include "pcre_printint.src"
112    
113    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114    
 #define PCRE_SPY        /* For Win32 build, import data, not export */  
 #include "internal.h"  
115    
116  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
117  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 121  Makefile. */
121  #include "pcreposix.h"  #include "pcreposix.h"
122  #endif  #endif
123    
124    /* It is also possible, for the benefit of the version currently imported into
125    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126    interface to the DFA matcher (NODFA), and without the doublecheck of the old
127    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128    UTF8 support if PCRE is built without it. */
129    
130    #ifndef SUPPORT_UTF8
131    #ifndef NOUTF8
132    #define NOUTF8
133    #endif
134    #endif
135    
136    
137    /* Other parameters */
138    
139  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
140  #ifdef CLK_TCK  #ifdef CLK_TCK
141  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 144  Makefile. */
144  #endif  #endif
145  #endif  #endif
146    
147  #define LOOPREPEAT 50000  /* This is the default loop count for timing. */
148    
149  #define BUFFER_SIZE 30000  #define LOOPREPEAT 500000
 #define DBUFFER_SIZE BUFFER_SIZE  
150    
151    /* Static variables */
152    
153  static FILE *outfile;  static FILE *outfile;
154  static int log_store = 0;  static int log_store = 0;
# Line 48  static int callout_extra; Line 157  static int callout_extra;
157  static int callout_fail_count;  static int callout_fail_count;
158  static int callout_fail_id;  static int callout_fail_id;
159  static int first_callout;  static int first_callout;
160    static int locale_set = 0;
161  static int show_malloc;  static int show_malloc;
162  static int use_utf8;  static int use_utf8;
163  static size_t gotten_store;  static size_t gotten_store;
164    
165    /* The buffers grow automatically if very long input lines are encountered. */
166    
167  static const int utf8_table1[] = {  static int buffer_size = 50000;
168    0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  static uschar *buffer = NULL;
169    static uschar *dbuffer = NULL;
170  static const int utf8_table2[] = {  static uschar *pbuffer = NULL;
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
   
 static const int utf8_table3[] = {  
   0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  
171    
172    
173    
174  /*************************************************  /*************************************************
175  *         Print compiled regex                   *  *        Read or extend an input line            *
176  *************************************************/  *************************************************/
177    
178  /* The code for doing this is held in a separate file that is also included in  /* Input lines are read into buffer, but both patterns and data lines can be
179  pcre.c when it is compiled with the debug switch. It defines a function called  continued over multiple input lines. In addition, if the buffer fills up, we
180  print_internals(), which uses a table of opcode lengths defined by the macro  want to automatically expand it so as to be able to handle extremely large
181  OP_LENGTHS, whose name must be OP_lengths. */  lines that are needed for certain stress tests. When the input buffer is
182    expanded, the other two buffers must also be expanded likewise, and the
183    contents of pbuffer, which are a copy of the input for callouts, must be
184    preserved (for when expansion happens for a data line). This is not the most
185    optimal way of handling this, but hey, this is just a test program!
186    
187    Arguments:
188      f            the file to read
189      start        where in buffer to start (this *must* be within buffer)
190    
191    Returns:       pointer to the start of new data
192                   could be a copy of start, or could be moved
193                   NULL if no data read and EOF reached
194    */
195    
196    static uschar *
197    extend_inputline(FILE *f, uschar *start)
198    {
199    uschar *here = start;
200    
201    for (;;)
202      {
203      int rlen = buffer_size - (here - buffer);
204    
205      if (rlen > 1000)
206        {
207        int dlen;
208        if (fgets((char *)here, rlen,  f) == NULL)
209          return (here == start)? NULL : start;
210        dlen = (int)strlen((char *)here);
211        if (dlen > 0 && here[dlen - 1] == '\n') return start;
212        here += dlen;
213        }
214    
215      else
216        {
217        int new_buffer_size = 2*buffer_size;
218        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
219        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
220        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
221    
222        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
223          {
224          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
225          exit(1);
226          }
227    
228        memcpy(new_buffer, buffer, buffer_size);
229        memcpy(new_pbuffer, pbuffer, buffer_size);
230    
231        buffer_size = new_buffer_size;
232    
233        start = new_buffer + (start - buffer);
234        here = new_buffer + (here - buffer);
235    
236        free(buffer);
237        free(dbuffer);
238        free(pbuffer);
239    
240        buffer = new_buffer;
241        dbuffer = new_dbuffer;
242        pbuffer = new_pbuffer;
243        }
244      }
245    
246    return NULL;  /* Control never gets here */
247    }
248    
249    
250    
 static uschar OP_lengths[] = { OP_LENGTHS };  
251    
 #include "printint.c"  
252    
253    
254    
# Line 85  static uschar OP_lengths[] = { OP_LENGTH Line 258  static uschar OP_lengths[] = { OP_LENGTH
258    
259  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
260  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
261  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
262    
263  Arguments:  Arguments:
264    str           string to be converted    str           string to be converted
# Line 106  return(result); Line 279  return(result);
279    
280    
281    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
282    
283  /*************************************************  /*************************************************
284  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 151  return i + 1; Line 288  return i + 1;
288  and returns the value of the character.  and returns the value of the character.
289    
290  Argument:  Argument:
291    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
292    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
293    
294  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
295             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
296  */  */
297    
298    #if !defined NOUTF8
299    
300  static int  static int
301  utf82ord(unsigned char *buffer, int *vptr)  utf82ord(unsigned char *utf8bytes, int *vptr)
302  {  {
303  int c = *buffer++;  int c = *utf8bytes++;
304  int d = c;  int d = c;
305  int i, j, s;  int i, j, s;
306    
# Line 181  d = (c & utf8_table3[i]) << s; Line 320  d = (c & utf8_table3[i]) << s;
320    
321  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
322    {    {
323    c = *buffer++;    c = *utf8bytes++;
324    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
325    s -= 6;    s -= 6;
326    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 189  for (j = 0; j < i; j++) Line 328  for (j = 0; j < i; j++)
328    
329  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
330    
331  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
332    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
333  if (j != i) return -(i+1);  if (j != i) return -(i+1);
334    
# Line 199  if (j != i) return -(i+1); Line 338  if (j != i) return -(i+1);
338  return i+1;  return i+1;
339  }  }
340    
341    #endif
342    
343    
344    
345    /*************************************************
346    *       Convert character value to UTF-8         *
347    *************************************************/
348    
349    /* This function takes an integer value in the range 0 - 0x7fffffff
350    and encodes it as a UTF-8 character in 0 to 6 bytes.
351    
352    Arguments:
353      cvalue     the character value
354      utf8bytes  pointer to buffer for result - at least 6 bytes long
355    
356    Returns:     number of characters placed in the buffer
357    */
358    
359    #if !defined NOUTF8
360    
361    static int
362    ord2utf8(int cvalue, uschar *utf8bytes)
363    {
364    register int i, j;
365    for (i = 0; i < utf8_table1_size; i++)
366      if (cvalue <= utf8_table1[i]) break;
367    utf8bytes += i;
368    for (j = i; j > 0; j--)
369     {
370     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
371     cvalue >>= 6;
372     }
373    *utf8bytes = utf8_table2[i] | cvalue;
374    return i + 1;
375    }
376    
377    #endif
378    
379    
380    
381  /*************************************************  /*************************************************
# Line 211  chars without printing. */ Line 388  chars without printing. */
388    
389  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
390  {  {
391  int c;  int c = 0;
392  int yield = 0;  int yield = 0;
393    
394  while (length-- > 0)  while (length-- > 0)
395    {    {
396    #if !defined NOUTF8
397    if (use_utf8)    if (use_utf8)
398      {      {
399      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
# Line 224  while (length-- > 0) Line 402  while (length-- > 0)
402        {        {
403        length -= rc - 1;        length -= rc - 1;
404        p += rc;        p += rc;
405        if (c < 256 && isprint(c))        if (PRINTHEX(c))
406          {          {
407          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
408          yield++;          yield++;
409          }          }
410        else        else
411          {          {
412          int n;          int n = 4;
413          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
414          yield += n;          yield += (n <= 0x000000ff)? 2 :
415                     (n <= 0x00000fff)? 3 :
416                     (n <= 0x0000ffff)? 4 :
417                     (n <= 0x000fffff)? 5 : 6;
418          }          }
419        continue;        continue;
420        }        }
421      }      }
422    #endif
423    
424     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
425    
426    if (isprint(c = *(p++)))    c = *p++;
427      if (PRINTHEX(c))
428      {      {
429      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
430      yield++;      yield++;
# Line 269  data is not zero. */ Line 452  data is not zero. */
452  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
453  {  {
454  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
455  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
456    
457  if (callout_extra)  if (callout_extra)
458    {    {
# Line 300  pre_start = pchars((unsigned char *)cb-> Line 483  pre_start = pchars((unsigned char *)cb->
483  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
484    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
485    
486    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
487    
488  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
489    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
490    
491  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
492    
493  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
494  shown */  shown. For automatic callouts, show the pattern offset. */
495    
496  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
497    else fprintf(outfile, "%3d ", cb->callout_number);    {
498      fprintf(outfile, "%+3d ", cb->pattern_position);
499      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
500      }
501    else
502      {
503      if (callout_extra) fprintf(outfile, "    ");
504        else fprintf(outfile, "%3d ", cb->callout_number);
505      }
506    
507  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
508  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 320  if (post_start > 0) Line 513  if (post_start > 0)
513    fprintf(outfile, "^");    fprintf(outfile, "^");
514    }    }
515    
516    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
517      fprintf(outfile, " ");
518    
519    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
520      pbuffer + cb->pattern_position);
521    
522  fprintf(outfile, "\n");  fprintf(outfile, "\n");
523  first_callout = 0;  first_callout = 0;
524    
# Line 350  static void *new_malloc(size_t size) Line 549  static void *new_malloc(size_t size)
549  void *block = malloc(size);  void *block = malloc(size);
550  gotten_store = size;  gotten_store = size;
551  if (show_malloc)  if (show_malloc)
552    fprintf(outfile, "malloc       %3d %p\n", size, block);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
553  return block;  return block;
554  }  }
555    
# Line 368  static void *stack_malloc(size_t size) Line 567  static void *stack_malloc(size_t size)
567  {  {
568  void *block = malloc(size);  void *block = malloc(size);
569  if (show_malloc)  if (show_malloc)
570    fprintf(outfile, "stack_malloc %3d %p\n", size, block);    fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
571  return block;  return block;
572  }  }
573    
# Line 396  if ((rc = pcre_fullinfo(re, study, optio Line 595  if ((rc = pcre_fullinfo(re, study, optio
595    
596    
597  /*************************************************  /*************************************************
598    *         Byte flipping function                 *
599    *************************************************/
600    
601    static unsigned long int
602    byteflip(unsigned long int value, int n)
603    {
604    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
605    return ((value & 0x000000ff) << 24) |
606           ((value & 0x0000ff00) <<  8) |
607           ((value & 0x00ff0000) >>  8) |
608           ((value & 0xff000000) >> 24);
609    }
610    
611    
612    
613    
614    /*************************************************
615    *        Check match or recursion limit          *
616    *************************************************/
617    
618    static int
619    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
620      int start_offset, int options, int *use_offsets, int use_size_offsets,
621      int flag, unsigned long int *limit, int errnumber, const char *msg)
622    {
623    int count;
624    int min = 0;
625    int mid = 64;
626    int max = -1;
627    
628    extra->flags |= flag;
629    
630    for (;;)
631      {
632      *limit = mid;
633    
634      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
635        use_offsets, use_size_offsets);
636    
637      if (count == errnumber)
638        {
639        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
640        min = mid;
641        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
642        }
643    
644      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
645                             count == PCRE_ERROR_PARTIAL)
646        {
647        if (mid == min + 1)
648          {
649          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
650          break;
651          }
652        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
653        max = mid;
654        mid = (min + mid)/2;
655        }
656      else break;    /* Some other error */
657      }
658    
659    extra->flags &= ~flag;
660    return count;
661    }
662    
663    
664    
665    /*************************************************
666    *         Check newline indicator                *
667    *************************************************/
668    
669    /* This is used both at compile and run-time to check for <xxx> escapes, where
670    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
671    no match.
672    
673    Arguments:
674      p           points after the leading '<'
675      f           file for error message
676    
677    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
678    */
679    
680    static int
681    check_newline(uschar *p, FILE *f)
682    {
683    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
684    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
685    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
686    if (strncmp((char *)p, "anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
687    if (strncmp((char *)p, "any>", 4) == 0) return PCRE_NEWLINE_ANY;
688    fprintf(f, "Unknown newline type at: <%s\n", p);
689    return 0;
690    }
691    
692    
693    
694    /*************************************************
695    *             Usage function                     *
696    *************************************************/
697    
698    static void
699    usage(void)
700    {
701    printf("Usage:     pcretest [options] [<input> [<output>]]\n");
702    printf("  -b       show compiled code (bytecode)\n");
703    printf("  -C       show PCRE compile-time options and exit\n");
704    printf("  -d       debug: show compiled code and information (-b and -i)\n");
705    #if !defined NODFA
706    printf("  -dfa     force DFA matching for all subjects\n");
707    #endif
708    printf("  -help    show usage information\n");
709    printf("  -i       show information about compiled patterns\n"
710           "  -m       output memory used information\n"
711           "  -o <n>   set size of offsets vector to <n>\n");
712    #if !defined NOPOSIX
713    printf("  -p       use POSIX interface\n");
714    #endif
715    printf("  -q       quiet: do not output PCRE version number at start\n");
716    printf("  -S <n>   set stack size to <n> megabytes\n");
717    printf("  -s       output store (memory) used information\n"
718           "  -t       time compilation and execution\n");
719    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
720    printf("  -tm      time execution (matching) only\n");
721    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
722    }
723    
724    
725    
726    /*************************************************
727  *                Main Program                    *  *                Main Program                    *
728  *************************************************/  *************************************************/
729    
# Line 410  int options = 0; Line 738  int options = 0;
738  int study_options = 0;  int study_options = 0;
739  int op = 1;  int op = 1;
740  int timeit = 0;  int timeit = 0;
741    int timeitm = 0;
742  int showinfo = 0;  int showinfo = 0;
743  int showstore = 0;  int showstore = 0;
744    int quiet = 0;
745  int size_offsets = 45;  int size_offsets = 45;
746  int size_offsets_max;  int size_offsets_max;
747  int *offsets;  int *offsets = NULL;
748  #if !defined NOPOSIX  #if !defined NOPOSIX
749  int posix = 0;  int posix = 0;
750  #endif  #endif
751  int debug = 0;  int debug = 0;
752  int done = 0;  int done = 0;
753    int all_use_dfa = 0;
754    int yield = 0;
755    int stack_size;
756    
757  unsigned char *buffer;  /* These vectors store, end-to-end, a list of captured substring names. Assume
758  unsigned char *dbuffer;  that 1024 is plenty long enough for the few names we'll be testing. */
759    
760    uschar copynames[1024];
761    uschar getnames[1024];
762    
763    uschar *copynamesptr;
764    uschar *getnamesptr;
765    
766  /* Get buffers from malloc() so that Electric Fence will check their misuse  /* Get buffers from malloc() so that Electric Fence will check their misuse
767  when I am debugging. */  when I am debugging. They grow automatically when very long lines are read. */
768    
769  buffer = (unsigned char *)malloc(BUFFER_SIZE);  buffer = (unsigned char *)malloc(buffer_size);
770  dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);  dbuffer = (unsigned char *)malloc(buffer_size);
771    pbuffer = (unsigned char *)malloc(buffer_size);
772    
773  /* Static so that new_malloc can use it. */  /* The outfile variable is static so that new_malloc can use it. */
774    
775  outfile = stdout;  outfile = stdout;
776    
777    /* The following  _setmode() stuff is some Windows magic that tells its runtime
778    library to translate CRLF into a single LF character. At least, that's what
779    I've been told: never having used Windows I take this all on trust. Originally
780    it set 0x8000, but then I was advised that _O_BINARY was better. */
781    
782    #if defined(_WIN32) || defined(WIN32)
783    _setmode( _fileno( stdout ), _O_BINARY );
784    #endif
785    
786  /* Scan options */  /* Scan options */
787    
788  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 442  while (argc > 1 && argv[op][0] == '-') Line 791  while (argc > 1 && argv[op][0] == '-')
791    
792    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
793      showstore = 1;      showstore = 1;
794    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
795      else if (strcmp(argv[op], "-b") == 0) debug = 1;
796    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
797    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
798    #if !defined NODFA
799      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
800    #endif
801    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
802        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
803          *endptr == 0))          *endptr == 0))
# Line 452  while (argc > 1 && argv[op][0] == '-') Line 805  while (argc > 1 && argv[op][0] == '-')
805      op++;      op++;
806      argc--;      argc--;
807      }      }
808      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
809        {
810        int both = argv[op][2] == 0;
811        int temp;
812        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
813                         *endptr == 0))
814          {
815          timeitm = temp;
816          op++;
817          argc--;
818          }
819        else timeitm = LOOPREPEAT;
820        if (both) timeit = timeitm;
821        }
822      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
823          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
824            *endptr == 0))
825        {
826    #if defined(_WIN32) || defined(WIN32)
827        printf("PCRE: -S not supported on this OS\n");
828        exit(1);
829    #else
830        int rc;
831        struct rlimit rlim;
832        getrlimit(RLIMIT_STACK, &rlim);
833        rlim.rlim_cur = stack_size * 1024 * 1024;
834        rc = setrlimit(RLIMIT_STACK, &rlim);
835        if (rc != 0)
836          {
837        printf("PCRE: setrlimit() failed with error %d\n", rc);
838        exit(1);
839          }
840        op++;
841        argc--;
842    #endif
843        }
844  #if !defined NOPOSIX  #if !defined NOPOSIX
845    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
846  #endif  #endif
# Line 462  while (argc > 1 && argv[op][0] == '-') Line 851  while (argc > 1 && argv[op][0] == '-')
851      printf("Compiled with\n");      printf("Compiled with\n");
852      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
853      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
854        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
855        printf("  %sUnicode properties support\n", rc? "" : "No ");
856      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
857      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
858          (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
859          (rc == -2)? "ANYCRLF" :
860          (rc == -1)? "ANY" : "???");
861      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
862      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
863      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
864      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
865      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
866      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %d\n", rc);
867        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
868        printf("  Default recursion depth limit = %d\n", rc);
869      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);      (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
870      printf("  Match recursion uses %s\n", rc? "stack" : "heap");      printf("  Match recursion uses %s\n", rc? "stack" : "heap");
871      exit(0);      goto EXIT;
872        }
873      else if (strcmp(argv[op], "-help") == 0 ||
874               strcmp(argv[op], "--help") == 0)
875        {
876        usage();
877        goto EXIT;
878      }      }
879    else    else
880      {      {
881      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
882      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
883      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
884      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
885      }      }
886    op++;    op++;
887    argc--;    argc--;
# Line 500  offsets = (int *)malloc(size_offsets_max Line 894  offsets = (int *)malloc(size_offsets_max
894  if (offsets == NULL)  if (offsets == NULL)
895    {    {
896    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
897      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
898    return 1;    yield = 1;
899      goto EXIT;
900    }    }
901    
902  /* Sort out the input and output files */  /* Sort out the input and output files */
903    
904  if (argc > 1)  if (argc > 1)
905    {    {
906    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
907    if (infile == NULL)    if (infile == NULL)
908      {      {
909      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
910      return 1;      yield = 1;
911        goto EXIT;
912      }      }
913    }    }
914    
915  if (argc > 2)  if (argc > 2)
916    {    {
917    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
918    if (outfile == NULL)    if (outfile == NULL)
919      {      {
920      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
921      return 1;      yield = 1;
922        goto EXIT;
923      }      }
924    }    }
925    
# Line 533  pcre_free = new_free; Line 930  pcre_free = new_free;
930  pcre_stack_malloc = stack_malloc;  pcre_stack_malloc = stack_malloc;
931  pcre_stack_free = stack_free;  pcre_stack_free = stack_free;
932    
933  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
934    
935  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
936    
937  /* Main loop */  /* Main loop */
938    
# Line 551  while (!done) Line 948  while (!done)
948    
949    const char *error;    const char *error;
950    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
951      unsigned char *to_file = NULL;
952    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
953      unsigned long int true_size, true_study_size = 0;
954      size_t size, regex_gotten_store;
955    int do_study = 0;    int do_study = 0;
956    int do_debug = debug;    int do_debug = debug;
957      int debug_lengths = 1;
958    int do_G = 0;    int do_G = 0;
959    int do_g = 0;    int do_g = 0;
960    int do_showinfo = showinfo;    int do_showinfo = showinfo;
961    int do_showrest = 0;    int do_showrest = 0;
962    int erroroffset, len, delimiter;    int do_flip = 0;
963      int erroroffset, len, delimiter, poffset;
964    
965    use_utf8 = 0;    use_utf8 = 0;
966    
967    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
968    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
969    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
970    fflush(outfile);    fflush(outfile);
971    
# Line 571  while (!done) Line 973  while (!done)
973    while (isspace(*p)) p++;    while (isspace(*p)) p++;
974    if (*p == 0) continue;    if (*p == 0) continue;
975    
976    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
977    complete, read more. */  
978      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
979        {
980        unsigned long int magic, get_options;
981        uschar sbuf[8];
982        FILE *f;
983    
984        p++;
985        pp = p + (int)strlen((char *)p);
986        while (isspace(pp[-1])) pp--;
987        *pp = 0;
988    
989        f = fopen((char *)p, "rb");
990        if (f == NULL)
991          {
992          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
993          continue;
994          }
995    
996        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
997    
998        true_size =
999          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1000        true_study_size =
1001          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1002    
1003        re = (real_pcre *)new_malloc(true_size);
1004        regex_gotten_store = gotten_store;
1005    
1006        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1007    
1008        magic = ((real_pcre *)re)->magic_number;
1009        if (magic != MAGIC_NUMBER)
1010          {
1011          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1012            {
1013            do_flip = 1;
1014            }
1015          else
1016            {
1017            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1018            fclose(f);
1019            continue;
1020            }
1021          }
1022    
1023        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1024          do_flip? " (byte-inverted)" : "", p);
1025    
1026        /* Need to know if UTF-8 for printing data strings */
1027    
1028        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1029        use_utf8 = (get_options & PCRE_UTF8) != 0;
1030    
1031        /* Now see if there is any following study data */
1032    
1033        if (true_study_size != 0)
1034          {
1035          pcre_study_data *psd;
1036    
1037          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1038          extra->flags = PCRE_EXTRA_STUDY_DATA;
1039    
1040          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1041          extra->study_data = psd;
1042    
1043          if (fread(psd, 1, true_study_size, f) != true_study_size)
1044            {
1045            FAIL_READ:
1046            fprintf(outfile, "Failed to read data from %s\n", p);
1047            if (extra != NULL) new_free(extra);
1048            if (re != NULL) new_free(re);
1049            fclose(f);
1050            continue;
1051            }
1052          fprintf(outfile, "Study data loaded from %s\n", p);
1053          do_study = 1;     /* To get the data output if requested */
1054          }
1055        else fprintf(outfile, "No study data\n");
1056    
1057        fclose(f);
1058        goto SHOW_INFO;
1059        }
1060    
1061      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1062      the pattern; if is isn't complete, read more. */
1063    
1064    delimiter = *p++;    delimiter = *p++;
1065    
# Line 583  while (!done) Line 1070  while (!done)
1070      }      }
1071    
1072    pp = p;    pp = p;
1073      poffset = p - buffer;
1074    
1075    for(;;)    for(;;)
1076      {      {
# Line 593  while (!done) Line 1081  while (!done)
1081        pp++;        pp++;
1082        }        }
1083      if (*pp != 0) break;      if (*pp != 0) break;
   
     len = BUFFER_SIZE - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
1084      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
1085      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
1086        {        {
1087        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1088        done = 1;        done = 1;
# Line 611  while (!done) Line 1091  while (!done)
1091      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1092      }      }
1093    
1094      /* The buffer may have moved while being extended; reset the start of data
1095      pointer to the correct relative point in the buffer. */
1096    
1097      p = buffer + poffset;
1098    
1099    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1100    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1101    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1102    
1103    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1104    
1105    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1106      for callouts. */
1107    
1108    *pp++ = 0;    *pp++ = 0;
1109      strcpy((char *)pbuffer, (char *)p);
1110    
1111    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1112    
# Line 631  while (!done) Line 1118  while (!done)
1118      {      {
1119      switch (*pp++)      switch (*pp++)
1120        {        {
1121          case 'f': options |= PCRE_FIRSTLINE; break;
1122        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1123        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1124        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 639  while (!done) Line 1127  while (!done)
1127    
1128        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1129        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1130          case 'B': do_debug = 1; break;
1131          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1132        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1133        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1134          case 'F': do_flip = 1; break;
1135        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1136        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1137          case 'J': options |= PCRE_DUPNAMES; break;
1138        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1139        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1140    
# Line 653  while (!done) Line 1145  while (!done)
1145        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1146        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1147        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1148          case 'Z': debug_lengths = 0; break;
1149        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1150        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
1151    
1152        case 'L':        case 'L':
1153        ppp = pp;        ppp = pp;
1154        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1155          /* The '0' test is just in case this is an unterminated line. */
1156          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1157        *ppp = 0;        *ppp = 0;
1158        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1159          {          {
1160          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1161          goto SKIP_DATA;          goto SKIP_DATA;
1162          }          }
1163          locale_set = 1;
1164        tables = pcre_maketables();        tables = pcre_maketables();
1165        pp = ppp;        pp = ppp;
1166        break;        break;
1167    
1168        case '\n': case ' ': break;        case '>':
1169          to_file = pp;
1170          while (*pp != 0) pp++;
1171          while (isspace(pp[-1])) pp--;
1172          *pp = 0;
1173          break;
1174    
1175          case '<':
1176            {
1177            int x = check_newline(pp, outfile);
1178            if (x == 0) goto SKIP_DATA;
1179            options |= x;
1180            while (*pp++ != '>');
1181            }
1182          break;
1183    
1184          case '\r':                      /* So that it works in Windows */
1185          case '\n':
1186          case ' ':
1187          break;
1188    
1189        default:        default:
1190        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1191        goto SKIP_DATA;        goto SKIP_DATA;
# Line 685  while (!done) Line 1201  while (!done)
1201      {      {
1202      int rc;      int rc;
1203      int cflags = 0;      int cflags = 0;
1204    
1205      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1206      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1207        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1208        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1209        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1210    
1211      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1212    
1213      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 694  while (!done) Line 1215  while (!done)
1215    
1216      if (rc != 0)      if (rc != 0)
1217        {        {
1218        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1219        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1220        goto SKIP_DATA;        goto SKIP_DATA;
1221        }        }
# Line 706  while (!done) Line 1227  while (!done)
1227  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1228    
1229      {      {
1230      if (timeit)      if (timeit > 0)
1231        {        {
1232        register int i;        register int i;
1233        clock_t time_taken;        clock_t time_taken;
1234        clock_t start_time = clock();        clock_t start_time = clock();
1235        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1236          {          {
1237          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1238          if (re != NULL) free(re);          if (re != NULL) free(re);
1239          }          }
1240        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1241        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1242          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1243            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1244        }        }
1245    
# Line 735  while (!done) Line 1256  while (!done)
1256          {          {
1257          for (;;)          for (;;)
1258            {            {
1259            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1260              {              {
1261              done = 1;              done = 1;
1262              goto CONTINUE;              goto CONTINUE;
# Line 759  while (!done) Line 1280  while (!done)
1280                sizeof(real_pcre) -                sizeof(real_pcre) -
1281                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1282    
1283        /* Extract the size for possible writing before possibly flipping it,
1284        and remember the store that was got. */
1285    
1286        true_size = ((real_pcre *)re)->size;
1287        regex_gotten_store = gotten_store;
1288    
1289        /* If /S was present, study the regexp to generate additional info to
1290        help with the matching. */
1291    
1292        if (do_study)
1293          {
1294          if (timeit > 0)
1295            {
1296            register int i;
1297            clock_t time_taken;
1298            clock_t start_time = clock();
1299            for (i = 0; i < timeit; i++)
1300              extra = pcre_study(re, study_options, &error);
1301            time_taken = clock() - start_time;
1302            if (extra != NULL) free(extra);
1303            fprintf(outfile, "  Study time %.4f milliseconds\n",
1304              (((double)time_taken * 1000.0) / (double)timeit) /
1305                (double)CLOCKS_PER_SEC);
1306            }
1307          extra = pcre_study(re, study_options, &error);
1308          if (error != NULL)
1309            fprintf(outfile, "Failed to study: %s\n", error);
1310          else if (extra != NULL)
1311            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1312          }
1313    
1314        /* If the 'F' option was present, we flip the bytes of all the integer
1315        fields in the regex data block and the study block. This is to make it
1316        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1317        compiled on a different architecture. */
1318    
1319        if (do_flip)
1320          {
1321          real_pcre *rre = (real_pcre *)re;
1322          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1323          rre->size = byteflip(rre->size, sizeof(rre->size));
1324          rre->options = byteflip(rre->options, sizeof(rre->options));
1325          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1326          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1327          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1328          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1329          rre->name_table_offset = byteflip(rre->name_table_offset,
1330            sizeof(rre->name_table_offset));
1331          rre->name_entry_size = byteflip(rre->name_entry_size,
1332            sizeof(rre->name_entry_size));
1333          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1334    
1335          if (extra != NULL)
1336            {
1337            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1338            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1339            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1340            }
1341          }
1342    
1343        /* Extract information from the compiled data if required */
1344    
1345        SHOW_INFO:
1346    
1347        if (do_debug)
1348          {
1349          fprintf(outfile, "------------------------------------------------------------------\n");
1350          pcre_printint(re, outfile, debug_lengths);
1351          }
1352    
1353      if (do_showinfo)      if (do_showinfo)
1354        {        {
1355        unsigned long int get_options;        unsigned long int get_options, all_options;
1356    #if !defined NOINFOCHECK
1357        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1358        int count, backrefmax, first_char, need_char;  #endif
1359          int count, backrefmax, first_char, need_char, okpartial, jchanged;
1360        int nameentrysize, namecount;        int nameentrysize, namecount;
1361        const uschar *nametable;        const uschar *nametable;
       size_t size;  
   
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
1362    
1363        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1364        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
# Line 783  while (!done) Line 1369  while (!done)
1369        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1370        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1371        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1372          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1373          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1374    
1375    #if !defined NOINFOCHECK
1376        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1377        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1378          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 801  while (!done) Line 1390  while (!done)
1390            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1391              get_options, old_options);              get_options, old_options);
1392          }          }
1393    #endif
1394    
1395        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1396          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1397          size, gotten_store);          (int)size, (int)regex_gotten_store);
1398    
1399        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1400        if (backrefmax > 0)        if (backrefmax > 0)
# Line 822  while (!done) Line 1412  while (!done)
1412            }            }
1413          }          }
1414    
1415          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1416    
1417          all_options = ((real_pcre *)re)->options;
1418          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1419    
1420        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1421          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1422            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1423            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1424            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1425            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1426              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1427            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1428            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1429            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1430            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1431              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1432            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1433            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1434              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1435    
1436        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1437          fprintf(outfile, "Case state changes\n");  
1438          switch (get_options & PCRE_NEWLINE_BITS)
1439            {
1440            case PCRE_NEWLINE_CR:
1441            fprintf(outfile, "Forced newline sequence: CR\n");
1442            break;
1443    
1444            case PCRE_NEWLINE_LF:
1445            fprintf(outfile, "Forced newline sequence: LF\n");
1446            break;
1447    
1448            case PCRE_NEWLINE_CRLF:
1449            fprintf(outfile, "Forced newline sequence: CRLF\n");
1450            break;
1451    
1452            case PCRE_NEWLINE_ANYCRLF:
1453            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1454            break;
1455    
1456            case PCRE_NEWLINE_ANY:
1457            fprintf(outfile, "Forced newline sequence: ANY\n");
1458            break;
1459    
1460            default:
1461            break;
1462            }
1463    
1464        if (first_char == -1)        if (first_char == -1)
1465          {          {
1466          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1467          }          }
1468        else if (first_char < 0)        else if (first_char < 0)
1469          {          {
# Line 851  while (!done) Line 1474  while (!done)
1474          int ch = first_char & 255;          int ch = first_char & 255;
1475          const char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1476            "" : " (caseless)";            "" : " (caseless)";
1477          if (isprint(ch))          if (PRINTHEX(ch))
1478            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1479          else          else
1480            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 866  while (!done) Line 1489  while (!done)
1489          int ch = need_char & 255;          int ch = need_char & 255;
1490          const char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1491            "" : " (caseless)";            "" : " (caseless)";
1492          if (isprint(ch))          if (PRINTHEX(ch))
1493            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1494          else          else
1495            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1496          }          }
       }  
   
     /* If /S was present, study the regexp to generate additional info to  
     help with the matching. */  
   
     if (do_study)  
       {  
       if (timeit)  
         {  
         register int i;  
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < LOOPREPEAT; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.3f milliseconds\n",  
           (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /  
             (double)CLOCKS_PER_SEC);  
         }  
   
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1497    
1498        /* Don't output study size; at present it is in any case a fixed        /* Don't output study size; at present it is in any case a fixed
1499        value, but it varies, depending on the computer architecture, and        value, but it varies, depending on the computer architecture, and
1500        so messes up the test suite. */        so messes up the test suite. (And with the /F option, it might be
1501          flipped.) */
1502    
1503        else if (do_showinfo)        if (do_study)
1504          {          {
1505          size_t size;          if (extra == NULL)
1506          uschar *start_bits = NULL;            fprintf(outfile, "Study returned NULL\n");
         new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);  
         new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);  
         /* fprintf(outfile, "Study size = %d\n", size); */  
         if (start_bits == NULL)  
           fprintf(outfile, "No starting character set\n");  
1507          else          else
1508            {            {
1509            int i;            uschar *start_bits = NULL;
1510            int c = 24;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1511            fprintf(outfile, "Starting character set: ");  
1512            for (i = 0; i < 256; i++)            if (start_bits == NULL)
1513                fprintf(outfile, "No starting byte set\n");
1514              else
1515              {              {
1516              if ((start_bits[i/8] & (1<<(i%8))) != 0)              int i;
1517                int c = 24;
1518                fprintf(outfile, "Starting byte set: ");
1519                for (i = 0; i < 256; i++)
1520                {                {
1521                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
                 {  
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
1522                  {                  {
1523                  fprintf(outfile, "\\x%02x ", i);                  if (c > 75)
1524                  c += 5;                    {
1525                      fprintf(outfile, "\n  ");
1526                      c = 2;
1527                      }
1528                    if (PRINTHEX(i) && i != ' ')
1529                      {
1530                      fprintf(outfile, "%c ", i);
1531                      c += 2;
1532                      }
1533                    else
1534                      {
1535                      fprintf(outfile, "\\x%02x ", i);
1536                      c += 5;
1537                      }
1538                  }                  }
1539                }                }
1540                fprintf(outfile, "\n");
1541              }              }
           fprintf(outfile, "\n");  
1542            }            }
1543          }          }
1544        }        }
1545      }  
1546        /* If the '>' option was present, we write out the regex to a file, and
1547        that is all. The first 8 bytes of the file are the regex length and then
1548        the study length, in big-endian order. */
1549    
1550        if (to_file != NULL)
1551          {
1552          FILE *f = fopen((char *)to_file, "wb");
1553          if (f == NULL)
1554            {
1555            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1556            }
1557          else
1558            {
1559            uschar sbuf[8];
1560            sbuf[0] = (true_size >> 24)  & 255;
1561            sbuf[1] = (true_size >> 16)  & 255;
1562            sbuf[2] = (true_size >>  8)  & 255;
1563            sbuf[3] = (true_size)  & 255;
1564    
1565            sbuf[4] = (true_study_size >> 24)  & 255;
1566            sbuf[5] = (true_study_size >> 16)  & 255;
1567            sbuf[6] = (true_study_size >>  8)  & 255;
1568            sbuf[7] = (true_study_size)  & 255;
1569    
1570            if (fwrite(sbuf, 1, 8, f) < 8 ||
1571                fwrite(re, 1, true_size, f) < true_size)
1572              {
1573              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1574              }
1575            else
1576              {
1577              fprintf(outfile, "Compiled regex written to %s\n", to_file);
1578              if (extra != NULL)
1579                {
1580                if (fwrite(extra->study_data, 1, true_study_size, f) <
1581                    true_study_size)
1582                  {
1583                  fprintf(outfile, "Write error on %s: %s\n", to_file,
1584                    strerror(errno));
1585                  }
1586                else fprintf(outfile, "Study data written to %s\n", to_file);
1587    
1588                }
1589              }
1590            fclose(f);
1591            }
1592    
1593          new_free(re);
1594          if (extra != NULL) new_free(extra);
1595          if (tables != NULL) new_free((void *)tables);
1596          continue;  /* With next regex */
1597          }
1598        }        /* End of non-POSIX compile */
1599    
1600    /* Read data lines and test them */    /* Read data lines and test them */
1601    
1602    for (;;)    for (;;)
1603      {      {
1604      unsigned char *q;      uschar *q;
1605      unsigned char *bptr = dbuffer;      uschar *bptr;
1606      int *use_offsets = offsets;      int *use_offsets = offsets;
1607      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1608      int callout_data = 0;      int callout_data = 0;
# Line 961  while (!done) Line 1615  while (!done)
1615      int gmatched = 0;      int gmatched = 0;
1616      int start_offset = 0;      int start_offset = 0;
1617      int g_notempty = 0;      int g_notempty = 0;
1618        int use_dfa = 0;
1619    
1620      options = 0;      options = 0;
1621    
1622        *copynames = 0;
1623        *getnames = 0;
1624    
1625        copynamesptr = copynames;
1626        getnamesptr = getnames;
1627    
1628      pcre_callout = callout;      pcre_callout = callout;
1629      first_callout = 1;      first_callout = 1;
1630      callout_extra = 0;      callout_extra = 0;
# Line 972  while (!done) Line 1633  while (!done)
1633      callout_fail_id = -1;      callout_fail_id = -1;
1634      show_malloc = 0;      show_malloc = 0;
1635    
1636      if (infile == stdin) printf("data> ");      if (extra != NULL) extra->flags &=
1637      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)        ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1638    
1639        len = 0;
1640        for (;;)
1641        {        {
1642        done = 1;        if (infile == stdin) printf("data> ");
1643        goto CONTINUE;        if (extend_inputline(infile, buffer + len) == NULL)
1644            {
1645            if (len > 0) break;
1646            done = 1;
1647            goto CONTINUE;
1648            }
1649          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1650          len = (int)strlen((char *)buffer);
1651          if (buffer[len-1] == '\n') break;
1652        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1653    
     len = (int)strlen((char *)buffer);  
1654      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1655      buffer[len] = 0;      buffer[len] = 0;
1656      if (len == 0) break;      if (len == 0) break;
# Line 988  while (!done) Line 1658  while (!done)
1658      p = buffer;      p = buffer;
1659      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1660    
1661      q = dbuffer;      bptr = q = dbuffer;
1662      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1663        {        {
1664        int i = 0;        int i = 0;
# Line 1010  while (!done) Line 1680  while (!done)
1680          c -= '0';          c -= '0';
1681          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1682            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1683    
1684    #if !defined NOUTF8
1685            if (use_utf8 && c > 255)
1686              {
1687              unsigned char buff8[8];
1688              int ii, utn;
1689              utn = ord2utf8(c, buff8);
1690              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1691              c = buff8[ii];   /* Last byte */
1692              }
1693    #endif
1694          break;          break;
1695    
1696          case 'x':          case 'x':
1697    
1698          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1699    
1700    #if !defined NOUTF8
1701          if (*p == '{')          if (*p == '{')
1702            {            {
1703            unsigned char *pt = p;            unsigned char *pt = p;
# Line 1034  while (!done) Line 1716  while (!done)
1716              }              }
1717            /* Not correct form; fall through */            /* Not correct form; fall through */
1718            }            }
1719    #endif
1720    
1721          /* Ordinary \x */          /* Ordinary \x */
1722    
# Line 1045  while (!done) Line 1728  while (!done)
1728            }            }
1729          break;          break;
1730    
1731          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1732          p--;          p--;
1733          continue;          continue;
1734    
1735            case '>':
1736            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1737            continue;
1738    
1739          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1740          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1741          continue;          continue;
# Line 1065  while (!done) Line 1752  while (!done)
1752            }            }
1753          else if (isalnum(*p))          else if (isalnum(*p))
1754            {            {
1755            uschar name[256];            uschar *npp = copynamesptr;
           uschar *npp = name;  
1756            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1757              *npp++ = 0;
1758            *npp = 0;            *npp = 0;
1759            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1760            if (n < 0)            if (n < 0)
1761              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1762            else copystrings |= 1 << n;            copynamesptr = npp;
1763            }            }
1764          else if (*p == '+')          else if (*p == '+')
1765            {            {
# Line 1110  while (!done) Line 1797  while (!done)
1797            }            }
1798          continue;          continue;
1799    
1800    #if !defined NODFA
1801            case 'D':
1802    #if !defined NOPOSIX
1803            if (posix || do_posix)
1804              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1805            else
1806    #endif
1807              use_dfa = 1;
1808            continue;
1809    
1810            case 'F':
1811            options |= PCRE_DFA_SHORTEST;
1812            continue;
1813    #endif
1814    
1815          case 'G':          case 'G':
1816          if (isdigit(*p))          if (isdigit(*p))
1817            {            {
# Line 1118  while (!done) Line 1820  while (!done)
1820            }            }
1821          else if (isalnum(*p))          else if (isalnum(*p))
1822            {            {
1823            uschar name[256];            uschar *npp = getnamesptr;
           uschar *npp = name;  
1824            while (isalnum(*p)) *npp++ = *p++;            while (isalnum(*p)) *npp++ = *p++;
1825              *npp++ = 0;
1826            *npp = 0;            *npp = 0;
1827            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1828            if (n < 0)            if (n < 0)
1829              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1830            else getstrings |= 1 << n;            getnamesptr = npp;
1831            }            }
1832          continue;          continue;
1833    
# Line 1151  while (!done) Line 1853  while (!done)
1853            if (offsets == NULL)            if (offsets == NULL)
1854              {              {
1855              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1856                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1857              return 1;              yield = 1;
1858                goto EXIT;
1859              }              }
1860            }            }
1861          use_size_offsets = n;          use_size_offsets = n;
1862          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1863          continue;          continue;
1864    
1865            case 'P':
1866            options |= PCRE_PARTIAL;
1867            continue;
1868    
1869            case 'Q':
1870            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1871            if (extra == NULL)
1872              {
1873              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1874              extra->flags = 0;
1875              }
1876            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1877            extra->match_limit_recursion = n;
1878            continue;
1879    
1880            case 'q':
1881            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1882            if (extra == NULL)
1883              {
1884              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1885              extra->flags = 0;
1886              }
1887            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1888            extra->match_limit = n;
1889            continue;
1890    
1891    #if !defined NODFA
1892            case 'R':
1893            options |= PCRE_DFA_RESTART;
1894            continue;
1895    #endif
1896    
1897          case 'S':          case 'S':
1898          show_malloc = 1;          show_malloc = 1;
1899          continue;          continue;
# Line 1170  while (!done) Line 1905  while (!done)
1905          case '?':          case '?':
1906          options |= PCRE_NO_UTF8_CHECK;          options |= PCRE_NO_UTF8_CHECK;
1907          continue;          continue;
1908    
1909            case '<':
1910              {
1911              int x = check_newline(p, outfile);
1912              if (x == 0) goto NEXT_DATA;
1913              options |= x;
1914              while (*p++ != '>');
1915              }
1916            continue;
1917          }          }
1918        *q++ = c;        *q++ = c;
1919        }        }
1920      *q = 0;      *q = 0;
1921      len = q - dbuffer;      len = q - dbuffer;
1922    
1923        if ((all_use_dfa || use_dfa) && find_match_limit)
1924          {
1925          printf("**Match limit not relevant for DFA matching: ignored\n");
1926          find_match_limit = 0;
1927          }
1928    
1929      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1930      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
1931    
# Line 1194  while (!done) Line 1944  while (!done)
1944    
1945        if (rc != 0)        if (rc != 0)
1946          {          {
1947          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1948          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1949          }          }
1950          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1951                  != 0)
1952            {
1953            fprintf(outfile, "Matched with REG_NOSUB\n");
1954            }
1955        else        else
1956          {          {
1957          size_t i;          size_t i;
# Line 1228  while (!done) Line 1983  while (!done)
1983    
1984      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
1985        {        {
1986        if (timeit)        if (timeitm > 0)
1987          {          {
1988          register int i;          register int i;
1989          clock_t time_taken;          clock_t time_taken;
1990          clock_t start_time = clock();          clock_t start_time = clock();
1991          for (i = 0; i < LOOPREPEAT; i++)  
1992    #if !defined NODFA
1993            if (all_use_dfa || use_dfa)
1994              {
1995              int workspace[1000];
1996              for (i = 0; i < timeitm; i++)
1997                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1998                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1999                  sizeof(workspace)/sizeof(int));
2000              }
2001            else
2002    #endif
2003    
2004            for (i = 0; i < timeitm; i++)
2005            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2006              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2007    
2008          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2009          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2010            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2011              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2012          }          }
2013    
2014        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2015        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2016          for the recursion limit. */
2017    
2018        if (find_match_limit)        if (find_match_limit)
2019          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2020          if (extra == NULL)          if (extra == NULL)
2021            {            {
2022            extra = (pcre_extra *)malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2023            extra->flags = 0;            extra->flags = 0;
2024            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
2025    
2026          for (;;)          (void)check_match_limit(re, extra, bptr, len, start_offset,
2027            {            options|g_notempty, use_offsets, use_size_offsets,
2028            extra->match_limit = mid;            PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2029            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,            PCRE_ERROR_MATCHLIMIT, "match()");
2030              options | g_notempty, use_offsets, use_size_offsets);  
2031            if (count == PCRE_ERROR_MATCHLIMIT)          count = check_match_limit(re, extra, bptr, len, start_offset,
2032              {            options|g_notempty, use_offsets, use_size_offsets,
2033              /* fprintf(outfile, "Testing match limit = %d\n", mid); */            PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2034              min = mid;            PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
   
         extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;  
2035          }          }
2036    
2037        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1305  while (!done) Line 2053  while (!done)
2053        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2054        value of match_limit. */        value of match_limit. */
2055    
2056        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
2057          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
2058            {
2059            int workspace[1000];
2060            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2061              options | g_notempty, use_offsets, use_size_offsets, workspace,
2062              sizeof(workspace)/sizeof(int));
2063            if (count == 0)
2064              {
2065              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2066              count = use_size_offsets/2;
2067              }
2068            }
2069    #endif
2070    
2071        if (count == 0)        else
2072          {          {
2073          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2074          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2075            if (count == 0)
2076              {
2077              fprintf(outfile, "Matched, but too many substrings\n");
2078              count = use_size_offsets/3;
2079              }
2080          }          }
2081    
2082        /* Matched */        /* Matched */
2083    
2084        if (count >= 0)        if (count >= 0)
2085          {          {
2086          int i;          int i, maxcount;
2087    
2088    #if !defined NODFA
2089            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2090    #endif
2091              maxcount = use_size_offsets/3;
2092    
2093            /* This is a check against a lunatic return value. */
2094    
2095            if (count > maxcount)
2096              {
2097              fprintf(outfile,
2098                "** PCRE error: returned count %d is too big for offset size %d\n",
2099                count, use_size_offsets);
2100              count = use_size_offsets/3;
2101              if (do_g || do_G)
2102                {
2103                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2104                do_g = do_G = FALSE;        /* Break g/G loop */
2105                }
2106              }
2107    
2108          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2109            {            {
2110            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1346  while (!done) Line 2132  while (!done)
2132            {            {
2133            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2134              {              {
2135              char copybuffer[16];              char copybuffer[256];
2136              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2137                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2138              if (rc < 0)              if (rc < 0)
# Line 1356  while (!done) Line 2142  while (!done)
2142              }              }
2143            }            }
2144    
2145            for (copynamesptr = copynames;
2146                 *copynamesptr != 0;
2147                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2148              {
2149              char copybuffer[256];
2150              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2151                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2152              if (rc < 0)
2153                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2154              else
2155                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2156              }
2157    
2158          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2159            {            {
2160            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1368  while (!done) Line 2167  while (!done)
2167              else              else
2168                {                {
2169                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2170                pcre_free_substring(substring);                pcre_free_substring(substring);
2171                }                }
2172              }              }
2173            }            }
2174    
2175            for (getnamesptr = getnames;
2176                 *getnamesptr != 0;
2177                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2178              {
2179              const char *substring;
2180              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2181                count, (char *)getnamesptr, &substring);
2182              if (rc < 0)
2183                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2184              else
2185                {
2186                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2187                pcre_free_substring(substring);
2188                }
2189              }
2190    
2191          if (getlist)          if (getlist)
2192            {            {
2193            const char **stringlist;            const char **stringlist;
# Line 1393  while (!done) Line 2207  while (!done)
2207            }            }
2208          }          }
2209    
2210          /* There was a partial match */
2211    
2212          else if (count == PCRE_ERROR_PARTIAL)
2213            {
2214            fprintf(outfile, "Partial match");
2215    #if !defined NODFA
2216            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2217              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2218                bptr + use_offsets[0]);
2219    #endif
2220            fprintf(outfile, "\n");
2221            break;  /* Out of the /g loop */
2222            }
2223    
2224        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2225        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2226        We want to advance the start offset, and continue. In the case of UTF-8        to advance the start offset, and continue. We won't be at the end of the
2227        matching, the advance must be one character, not one byte. Fudge the        string - that was checked before setting g_notempty.
2228        offset values to achieve this. We won't be at the end of the string -  
2229        that was checked before setting g_notempty. */        Complication arises in the case when the newline option is "any" or
2230          "anycrlf". If the previous match was at the end of a line terminated by
2231          CRLF, an advance of one character just passes the \r, whereas we should
2232          prefer the longer newline sequence, as does the code in pcre_exec().
2233          Fudge the offset value to achieve this.
2234    
2235          Otherwise, in the case of UTF-8 matching, the advance must be one
2236          character, not one byte. */
2237    
2238        else        else
2239          {          {
2240          if (g_notempty != 0)          if (g_notempty != 0)
2241            {            {
2242            int onechar = 1;            int onechar = 1;
2243              unsigned int obits = ((real_pcre *)re)->options;
2244            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2245            if (use_utf8)            if ((obits & PCRE_NEWLINE_BITS) == 0)
2246                {
2247                int d;
2248                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2249                obits = (d == '\r')? PCRE_NEWLINE_CR :
2250                        (d == '\n')? PCRE_NEWLINE_LF :
2251                        (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2252                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2253                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2254                }
2255              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2256                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2257                  &&
2258                  start_offset < len - 1 &&
2259                  bptr[start_offset] == '\r' &&
2260                  bptr[start_offset+1] == '\n')
2261                onechar++;
2262              else if (use_utf8)
2263              {              {
2264              while (start_offset + onechar < len)              while (start_offset + onechar < len)
2265                {                {
# Line 1441  while (!done) Line 2294  while (!done)
2294        character. */        character. */
2295    
2296        g_notempty = 0;        g_notempty = 0;
2297    
2298        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2299          {          {
2300          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1459  while (!done) Line 2313  while (!done)
2313          len -= use_offsets[1];          len -= use_offsets[1];
2314          }          }
2315        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2316    
2317        NEXT_DATA: continue;
2318      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2319    
2320    CONTINUE:    CONTINUE:
# Line 1467  while (!done) Line 2323  while (!done)
2323    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2324  #endif  #endif
2325    
2326    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2327    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2328    if (tables != NULL)    if (tables != NULL)
2329      {      {
2330      free((void *)tables);      new_free((void *)tables);
2331      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2332        locale_set = 0;
2333      }      }
2334    }    }
2335    
2336  if (infile == stdin) fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2337  return 0;  
2338    EXIT:
2339    
2340    if (infile != NULL && infile != stdin) fclose(infile);
2341    if (outfile != NULL && outfile != stdout) fclose(outfile);
2342    
2343    free(buffer);
2344    free(dbuffer);
2345    free(pbuffer);
2346    free(offsets);
2347    
2348    return yield;
2349  }  }
2350    
2351  /* End */  /* End of pcretest.c */

Legend:
Removed from v.73  
changed lines
  Added in v.200

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12