/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 65 by nigel, Sat Feb 24 21:40:08 2007 UTC revision 392 by ph10, Tue Mar 17 21:30:30 2009 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather untidy in places. */  been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39    #ifdef HAVE_CONFIG_H
40    #include "config.h"
41    #endif
42    
43  #include <ctype.h>  #include <ctype.h>
44  #include <stdio.h>  #include <stdio.h>
# Line 12  been extended and consequently is now ra Line 46  been extended and consequently is now ra
46  #include <stdlib.h>  #include <stdlib.h>
47  #include <time.h>  #include <time.h>
48  #include <locale.h>  #include <locale.h>
49    #include <errno.h>
50    
51    #ifdef SUPPORT_LIBREADLINE
52    #ifdef HAVE_UNISTD_H
53    #include <unistd.h>
54    #endif
55    #include <readline/readline.h>
56    #include <readline/history.h>
57    #endif
58    
59    
60  /* We need the internal info for displaying the results of pcre_study(). Also  /* A number of things vary for Windows builds. Originally, pcretest opened its
61  for getting the opcodes for showing compiled code. */  input and output without "b"; then I was told that "b" was needed in some
62    environments, so it was added for release 5.0 to both the input and output. (It
63    makes no difference on Unix-like systems.) Later I was told that it is wrong
64    for the input on Windows. I've now abstracted the modes into two macros that
65    are set here, to make it easier to fiddle with them, and removed "b" from the
66    input mode under Windows. */
67    
68    #if defined(_WIN32) || defined(WIN32)
69    #include <io.h>                /* For _setmode() */
70    #include <fcntl.h>             /* For _O_BINARY */
71    #define INPUT_MODE   "r"
72    #define OUTPUT_MODE  "wb"
73    
74    #define isatty _isatty         /* This is what Windows calls them, I'm told */
75    #define fileno _fileno
76    
77    #else
78    #include <sys/time.h>          /* These two includes are needed */
79    #include <sys/resource.h>      /* for setrlimit(). */
80    #define INPUT_MODE   "rb"
81    #define OUTPUT_MODE  "wb"
82    #endif
83    
84    
85    /* We have to include pcre_internal.h because we need the internal info for
86    displaying the results of pcre_study() and we also need to know about the
87    internal macros, structures, and other internal data values; pcretest has
88    "inside information" compared to a program that strictly follows the PCRE API.
89    
90    Although pcre_internal.h does itself include pcre.h, we explicitly include it
91    here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
92    appropriately for an application, not for building PCRE. */
93    
94    #include "pcre.h"
95    #include "pcre_internal.h"
96    
97    /* We need access to some of the data tables that PCRE uses. So as not to have
98    to keep two copies, we include the source file here, changing the names of the
99    external symbols to prevent clashes. */
100    
101    #define _pcre_ucp_gentype      ucp_gentype
102    #define _pcre_utf8_table1      utf8_table1
103    #define _pcre_utf8_table1_size utf8_table1_size
104    #define _pcre_utf8_table2      utf8_table2
105    #define _pcre_utf8_table3      utf8_table3
106    #define _pcre_utf8_table4      utf8_table4
107    #define _pcre_utt              utt
108    #define _pcre_utt_size         utt_size
109    #define _pcre_utt_names        utt_names
110    #define _pcre_OP_lengths       OP_lengths
111    
112    #include "pcre_tables.c"
113    
114    /* We also need the pcre_printint() function for printing out compiled
115    patterns. This function is in a separate file so that it can be included in
116    pcre_compile.c when that module is compiled with debugging enabled.
117    
118    The definition of the macro PRINTABLE, which determines whether to print an
119    output character as-is or as a hex value when showing compiled patterns, is
120    contained in this file. We uses it here also, in cases when the locale has not
121    been explicitly changed, so as to get consistent output from systems that
122    differ in their output from isprint() even in the "C" locale. */
123    
124    #include "pcre_printint.src"
125    
126    #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
127    
 #define PCRE_SPY        /* For Win32 build, import data, not export */  
 #include "internal.h"  
128    
129  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
130  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 27  Makefile. */ Line 134  Makefile. */
134  #include "pcreposix.h"  #include "pcreposix.h"
135  #endif  #endif
136    
137    /* It is also possible, for the benefit of the version currently imported into
138    Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
139    interface to the DFA matcher (NODFA), and without the doublecheck of the old
140    "info" function (define NOINFOCHECK). In fact, we automatically cut out the
141    UTF8 support if PCRE is built without it. */
142    
143    #ifndef SUPPORT_UTF8
144    #ifndef NOUTF8
145    #define NOUTF8
146    #endif
147    #endif
148    
149    
150    /* Other parameters */
151    
152  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
153  #ifdef CLK_TCK  #ifdef CLK_TCK
154  #define CLOCKS_PER_SEC CLK_TCK  #define CLOCKS_PER_SEC CLK_TCK
# Line 35  Makefile. */ Line 157  Makefile. */
157  #endif  #endif
158  #endif  #endif
159    
160  #define LOOPREPEAT 50000  /* This is the default loop count for timing. */
161    
162    #define LOOPREPEAT 500000
163    
164    /* Static variables */
165    
166  static FILE *outfile;  static FILE *outfile;
167  static int log_store = 0;  static int log_store = 0;
# Line 44  static int callout_count; Line 169  static int callout_count;
169  static int callout_extra;  static int callout_extra;
170  static int callout_fail_count;  static int callout_fail_count;
171  static int callout_fail_id;  static int callout_fail_id;
172    static int debug_lengths;
173  static int first_callout;  static int first_callout;
174  static int utf8;  static int locale_set = 0;
175    static int show_malloc;
176    static int use_utf8;
177  static size_t gotten_store;  static size_t gotten_store;
178    
179    /* The buffers grow automatically if very long input lines are encountered. */
180    
181    static int buffer_size = 50000;
182    static uschar *buffer = NULL;
183    static uschar *dbuffer = NULL;
184    static uschar *pbuffer = NULL;
185    
 static int utf8_table1[] = {  
   0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};  
186    
 static int utf8_table2[] = {  
   0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};  
187    
188  static int utf8_table3[] = {  /*************************************************
189    0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};  *        Read or extend an input line            *
190    *************************************************/
191    
192    /* Input lines are read into buffer, but both patterns and data lines can be
193    continued over multiple input lines. In addition, if the buffer fills up, we
194    want to automatically expand it so as to be able to handle extremely large
195    lines that are needed for certain stress tests. When the input buffer is
196    expanded, the other two buffers must also be expanded likewise, and the
197    contents of pbuffer, which are a copy of the input for callouts, must be
198    preserved (for when expansion happens for a data line). This is not the most
199    optimal way of handling this, but hey, this is just a test program!
200    
201    Arguments:
202      f            the file to read
203      start        where in buffer to start (this *must* be within buffer)
204      prompt       for stdin or readline()
205    
206    Returns:       pointer to the start of new data
207                   could be a copy of start, or could be moved
208                   NULL if no data read and EOF reached
209    */
210    
211    static uschar *
212    extend_inputline(FILE *f, uschar *start, const char *prompt)
213    {
214    uschar *here = start;
215    
216    for (;;)
217      {
218      int rlen = buffer_size - (here - buffer);
219    
220      if (rlen > 1000)
221        {
222        int dlen;
223    
224        /* If libreadline support is required, use readline() to read a line if the
225        input is a terminal. Note that readline() removes the trailing newline, so
226        we must put it back again, to be compatible with fgets(). */
227    
228    #ifdef SUPPORT_LIBREADLINE
229        if (isatty(fileno(f)))
230          {
231          size_t len;
232          char *s = readline(prompt);
233          if (s == NULL) return (here == start)? NULL : start;
234          len = strlen(s);
235          if (len > 0) add_history(s);
236          if (len > rlen - 1) len = rlen - 1;
237          memcpy(here, s, len);
238          here[len] = '\n';
239          here[len+1] = 0;
240          free(s);
241          }
242        else
243    #endif
244    
245        /* Read the next line by normal means, prompting if the file is stdin. */
246    
247          {
248          if (f == stdin) printf(prompt);
249          if (fgets((char *)here, rlen,  f) == NULL)
250            return (here == start)? NULL : start;
251          }
252    
253        dlen = (int)strlen((char *)here);
254        if (dlen > 0 && here[dlen - 1] == '\n') return start;
255        here += dlen;
256        }
257    
258      else
259        {
260        int new_buffer_size = 2*buffer_size;
261        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
262        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
263        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
264    
265        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
266          {
267          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
268          exit(1);
269          }
270    
271        memcpy(new_buffer, buffer, buffer_size);
272        memcpy(new_pbuffer, pbuffer, buffer_size);
273    
274        buffer_size = new_buffer_size;
275    
276        start = new_buffer + (start - buffer);
277        here = new_buffer + (here - buffer);
278    
279        free(buffer);
280        free(dbuffer);
281        free(pbuffer);
282    
283        buffer = new_buffer;
284        dbuffer = new_dbuffer;
285        pbuffer = new_pbuffer;
286        }
287      }
288    
289    return NULL;  /* Control never gets here */
290    }
291    
 /*************************************************  
 *         Print compiled regex                   *  
 *************************************************/  
292    
 /* The code for doing this is held in a separate file that is also included in  
 pcre.c when it is compiled with the debug switch. It defines a function called  
 print_internals(), which uses a table of opcode lengths defined by the macro  
 OP_LENGTHS, whose name must be OP_lengths. */  
293    
 static uschar OP_lengths[] = { OP_LENGTHS };  
294    
 #include "printint.c"  
295    
296    
297    
# Line 82  static uschar OP_lengths[] = { OP_LENGTH Line 301  static uschar OP_lengths[] = { OP_LENGTH
301    
302  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
303  around with conditional compilation, just do the job by hand. It is only used  around with conditional compilation, just do the job by hand. It is only used
304  for unpicking the -o argument, so just keep it simple.  for unpicking arguments, so just keep it simple.
305    
306  Arguments:  Arguments:
307    str           string to be converted    str           string to be converted
# Line 103  return(result); Line 322  return(result);
322    
323    
324    
 /*************************************************  
 *       Convert character value to UTF-8         *  
 *************************************************/  
   
 /* This function takes an integer value in the range 0 - 0x7fffffff  
 and encodes it as a UTF-8 character in 0 to 6 bytes.  
   
 Arguments:  
   cvalue     the character value  
   buffer     pointer to buffer for result - at least 6 bytes long  
   
 Returns:     number of characters placed in the buffer  
              -1 if input character is negative  
              0 if input character is positive but too big (only when  
              int is longer than 32 bits)  
 */  
   
 static int  
 ord2utf8(int cvalue, unsigned char *buffer)  
 {  
 register int i, j;  
 for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)  
   if (cvalue <= utf8_table1[i]) break;  
 if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;  
 if (cvalue < 0) return -1;  
   
 buffer += i;  
 for (j = i; j > 0; j--)  
  {  
  *buffer-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *buffer = utf8_table2[i] | cvalue;  
 return i + 1;  
 }  
   
325    
326  /*************************************************  /*************************************************
327  *            Convert UTF-8 string to value       *  *            Convert UTF-8 string to value       *
# Line 148  return i + 1; Line 331  return i + 1;
331  and returns the value of the character.  and returns the value of the character.
332    
333  Argument:  Argument:
334    buffer   a pointer to the byte vector    utf8bytes   a pointer to the byte vector
335    vptr     a pointer to an int to receive the value    vptr        a pointer to an int to receive the value
336    
337  Returns:   >  0 => the number of bytes consumed  Returns:      >  0 => the number of bytes consumed
338             -6 to 0 => malformed UTF-8 character at offset = (-return)                -6 to 0 => malformed UTF-8 character at offset = (-return)
339  */  */
340    
341  int  #if !defined NOUTF8
342  utf82ord(unsigned char *buffer, int *vptr)  
343    static int
344    utf82ord(unsigned char *utf8bytes, int *vptr)
345  {  {
346  int c = *buffer++;  int c = *utf8bytes++;
347  int d = c;  int d = c;
348  int i, j, s;  int i, j, s;
349    
# Line 178  d = (c & utf8_table3[i]) << s; Line 363  d = (c & utf8_table3[i]) << s;
363    
364  for (j = 0; j < i; j++)  for (j = 0; j < i; j++)
365    {    {
366    c = *buffer++;    c = *utf8bytes++;
367    if ((c & 0xc0) != 0x80) return -(j+1);    if ((c & 0xc0) != 0x80) return -(j+1);
368    s -= 6;    s -= 6;
369    d |= (c & 0x3f) << s;    d |= (c & 0x3f) << s;
# Line 186  for (j = 0; j < i; j++) Line 371  for (j = 0; j < i; j++)
371    
372  /* Check that encoding was the correct unique one */  /* Check that encoding was the correct unique one */
373    
374  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)  for (j = 0; j < utf8_table1_size; j++)
375    if (d <= utf8_table1[j]) break;    if (d <= utf8_table1[j]) break;
376  if (j != i) return -(i+1);  if (j != i) return -(i+1);
377    
# Line 196  if (j != i) return -(i+1); Line 381  if (j != i) return -(i+1);
381  return i+1;  return i+1;
382  }  }
383    
384    #endif
385    
386    
387    
388    /*************************************************
389    *       Convert character value to UTF-8         *
390    *************************************************/
391    
392    /* This function takes an integer value in the range 0 - 0x7fffffff
393    and encodes it as a UTF-8 character in 0 to 6 bytes.
394    
395    Arguments:
396      cvalue     the character value
397      utf8bytes  pointer to buffer for result - at least 6 bytes long
398    
399    Returns:     number of characters placed in the buffer
400    */
401    
402    #if !defined NOUTF8
403    
404    static int
405    ord2utf8(int cvalue, uschar *utf8bytes)
406    {
407    register int i, j;
408    for (i = 0; i < utf8_table1_size; i++)
409      if (cvalue <= utf8_table1[i]) break;
410    utf8bytes += i;
411    for (j = i; j > 0; j--)
412     {
413     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
414     cvalue >>= 6;
415     }
416    *utf8bytes = utf8_table2[i] | cvalue;
417    return i + 1;
418    }
419    
420    #endif
421    
422    
423    
424  /*************************************************  /*************************************************
# Line 208  chars without printing. */ Line 431  chars without printing. */
431    
432  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars(unsigned char *p, int length, FILE *f)
433  {  {
434  int c;  int c = 0;
435  int yield = 0;  int yield = 0;
436    
437  while (length-- > 0)  while (length-- > 0)
438    {    {
439    if (utf8)  #if !defined NOUTF8
440      if (use_utf8)
441      {      {
442      int rc = utf82ord(p, &c);      int rc = utf82ord(p, &c);
443    
# Line 221  while (length-- > 0) Line 445  while (length-- > 0)
445        {        {
446        length -= rc - 1;        length -= rc - 1;
447        p += rc;        p += rc;
448        if (c < 256 && isprint(c))        if (PRINTHEX(c))
449          {          {
450          if (f != NULL) fprintf(f, "%c", c);          if (f != NULL) fprintf(f, "%c", c);
451          yield++;          yield++;
452          }          }
453        else        else
454          {          {
455          int n;          int n = 4;
456          if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);          if (f != NULL) fprintf(f, "\\x{%02x}", c);
457          yield += n;          yield += (n <= 0x000000ff)? 2 :
458                     (n <= 0x00000fff)? 3 :
459                     (n <= 0x0000ffff)? 4 :
460                     (n <= 0x000fffff)? 5 : 6;
461          }          }
462        continue;        continue;
463        }        }
464      }      }
465    #endif
466    
467     /* Not UTF-8, or malformed UTF-8  */     /* Not UTF-8, or malformed UTF-8  */
468    
469    if (isprint(c = *(p++)))    c = *p++;
470      if (PRINTHEX(c))
471      {      {
472      if (f != NULL) fprintf(f, "%c", c);      if (f != NULL) fprintf(f, "%c", c);
473      yield++;      yield++;
# Line 266  data is not zero. */ Line 495  data is not zero. */
495  static int callout(pcre_callout_block *cb)  static int callout(pcre_callout_block *cb)
496  {  {
497  FILE *f = (first_callout | callout_extra)? outfile : NULL;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
498  int i, pre_start, post_start;  int i, pre_start, post_start, subject_length;
499    
500  if (callout_extra)  if (callout_extra)
501    {    {
   int i;  
502    fprintf(f, "Callout %d: last capture = %d\n",    fprintf(f, "Callout %d: last capture = %d\n",
503      cb->callout_number, cb->capture_last);      cb->callout_number, cb->capture_last);
504    
# Line 298  pre_start = pchars((unsigned char *)cb-> Line 526  pre_start = pchars((unsigned char *)cb->
526  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
527    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
528    
529    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
530    
531  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  (void)pchars((unsigned char *)(cb->subject + cb->current_position),
532    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
533    
534  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
535    
536  /* Always print appropriate indicators, with callout number if not already  /* Always print appropriate indicators, with callout number if not already
537  shown */  shown. For automatic callouts, show the pattern offset. */
538    
539  if (callout_extra) fprintf(outfile, "    ");  if (cb->callout_number == 255)
540    else fprintf(outfile, "%3d ", cb->callout_number);    {
541      fprintf(outfile, "%+3d ", cb->pattern_position);
542      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
543      }
544    else
545      {
546      if (callout_extra) fprintf(outfile, "    ");
547        else fprintf(outfile, "%3d ", cb->callout_number);
548      }
549    
550  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");  for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
551  fprintf(outfile, "^");  fprintf(outfile, "^");
# Line 318  if (post_start > 0) Line 556  if (post_start > 0)
556    fprintf(outfile, "^");    fprintf(outfile, "^");
557    }    }
558    
559  fprintf(outfile, "\n");  for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
560      fprintf(outfile, " ");
561    
562    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
563      pbuffer + cb->pattern_position);
564    
565    fprintf(outfile, "\n");
566  first_callout = 0;  first_callout = 0;
567    
568  if ((int)(cb->callout_data) != 0)  if (cb->callout_data != NULL)
569    {    {
570    fprintf(outfile, "Callout data = %d\n", (int)(cb->callout_data));    int callout_data = *((int *)(cb->callout_data));
571    return (int)(cb->callout_data);    if (callout_data != 0)
572        {
573        fprintf(outfile, "Callout data = %d\n", callout_data);
574        return callout_data;
575        }
576    }    }
577    
578  return (cb->callout_number != callout_fail_id)? 0 :  return (cb->callout_number != callout_fail_id)? 0 :
# Line 334  return (cb->callout_number != callout_fa Line 581  return (cb->callout_number != callout_fa
581    
582    
583  /*************************************************  /*************************************************
584  *            Local malloc function               *  *            Local malloc functions              *
585  *************************************************/  *************************************************/
586    
587  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
# Line 342  compiled re. */ Line 589  compiled re. */
589    
590  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
591  {  {
592    void *block = malloc(size);
593  gotten_store = size;  gotten_store = size;
594  return malloc(size);  if (show_malloc)
595      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
596    return block;
597  }  }
598    
599    static void new_free(void *block)
600    {
601    if (show_malloc)
602      fprintf(outfile, "free             %p\n", block);
603    free(block);
604    }
605    
606    
607    /* For recursion malloc/free, to test stacking calls */
608    
609    static void *stack_malloc(size_t size)
610    {
611    void *block = malloc(size);
612    if (show_malloc)
613      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
614    return block;
615    }
616    
617    static void stack_free(void *block)
618    {
619    if (show_malloc)
620      fprintf(outfile, "stack_free       %p\n", block);
621    free(block);
622    }
623    
624    
625  /*************************************************  /*************************************************
# Line 364  if ((rc = pcre_fullinfo(re, study, optio Line 638  if ((rc = pcre_fullinfo(re, study, optio
638    
639    
640  /*************************************************  /*************************************************
641    *         Byte flipping function                 *
642    *************************************************/
643    
644    static unsigned long int
645    byteflip(unsigned long int value, int n)
646    {
647    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
648    return ((value & 0x000000ff) << 24) |
649           ((value & 0x0000ff00) <<  8) |
650           ((value & 0x00ff0000) >>  8) |
651           ((value & 0xff000000) >> 24);
652    }
653    
654    
655    
656    
657    /*************************************************
658    *        Check match or recursion limit          *
659    *************************************************/
660    
661    static int
662    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
663      int start_offset, int options, int *use_offsets, int use_size_offsets,
664      int flag, unsigned long int *limit, int errnumber, const char *msg)
665    {
666    int count;
667    int min = 0;
668    int mid = 64;
669    int max = -1;
670    
671    extra->flags |= flag;
672    
673    for (;;)
674      {
675      *limit = mid;
676    
677      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
678        use_offsets, use_size_offsets);
679    
680      if (count == errnumber)
681        {
682        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
683        min = mid;
684        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
685        }
686    
687      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
688                             count == PCRE_ERROR_PARTIAL)
689        {
690        if (mid == min + 1)
691          {
692          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
693          break;
694          }
695        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
696        max = mid;
697        mid = (min + mid)/2;
698        }
699      else break;    /* Some other error */
700      }
701    
702    extra->flags &= ~flag;
703    return count;
704    }
705    
706    
707    
708    /*************************************************
709    *         Case-independent strncmp() function    *
710    *************************************************/
711    
712    /*
713    Arguments:
714      s         first string
715      t         second string
716      n         number of characters to compare
717    
718    Returns:    < 0, = 0, or > 0, according to the comparison
719    */
720    
721    static int
722    strncmpic(uschar *s, uschar *t, int n)
723    {
724    while (n--)
725      {
726      int c = tolower(*s++) - tolower(*t++);
727      if (c) return c;
728      }
729    return 0;
730    }
731    
732    
733    
734    /*************************************************
735    *         Check newline indicator                *
736    *************************************************/
737    
738    /* This is used both at compile and run-time to check for <xxx> escapes, where
739    xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
740    no match.
741    
742    Arguments:
743      p           points after the leading '<'
744      f           file for error message
745    
746    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
747    */
748    
749    static int
750    check_newline(uschar *p, FILE *f)
751    {
752    if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
753    if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
754    if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
755    if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
756    if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
757    if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
758    if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
759    fprintf(f, "Unknown newline type at: <%s\n", p);
760    return 0;
761    }
762    
763    
764    
765    /*************************************************
766    *             Usage function                     *
767    *************************************************/
768    
769    static void
770    usage(void)
771    {
772    printf("Usage:     pcretest [options] [<input file> [<output file>]]\n\n");
773    printf("Input and output default to stdin and stdout.\n");
774    #ifdef SUPPORT_LIBREADLINE
775    printf("If input is a terminal, readline() is used to read from it.\n");
776    #else
777    printf("This version of pcretest is not linked with readline().\n");
778    #endif
779    printf("\nOptions:\n");
780    printf("  -b       show compiled code (bytecode)\n");
781    printf("  -C       show PCRE compile-time options and exit\n");
782    printf("  -d       debug: show compiled code and information (-b and -i)\n");
783    #if !defined NODFA
784    printf("  -dfa     force DFA matching for all subjects\n");
785    #endif
786    printf("  -help    show usage information\n");
787    printf("  -i       show information about compiled patterns\n"
788           "  -M       find MATCH_LIMIT minimum for each subject\n"
789           "  -m       output memory used information\n"
790           "  -o <n>   set size of offsets vector to <n>\n");
791    #if !defined NOPOSIX
792    printf("  -p       use POSIX interface\n");
793    #endif
794    printf("  -q       quiet: do not output PCRE version number at start\n");
795    printf("  -S <n>   set stack size to <n> megabytes\n");
796    printf("  -s       output store (memory) used information\n"
797           "  -t       time compilation and execution\n");
798    printf("  -t <n>   time compilation and execution, repeating <n> times\n");
799    printf("  -tm      time execution (matching) only\n");
800    printf("  -tm <n>  time execution (matching) only, repeating <n> times\n");
801    }
802    
803    
804    
805    /*************************************************
806  *                Main Program                    *  *                Main Program                    *
807  *************************************************/  *************************************************/
808    
# Line 376  int main(int argc, char **argv) Line 815  int main(int argc, char **argv)
815  FILE *infile = stdin;  FILE *infile = stdin;
816  int options = 0;  int options = 0;
817  int study_options = 0;  int study_options = 0;
818    int default_find_match_limit = FALSE;
819  int op = 1;  int op = 1;
820  int timeit = 0;  int timeit = 0;
821    int timeitm = 0;
822  int showinfo = 0;  int showinfo = 0;
823  int showstore = 0;  int showstore = 0;
824    int quiet = 0;
825  int size_offsets = 45;  int size_offsets = 45;
826  int size_offsets_max;  int size_offsets_max;
827  int *offsets;  int *offsets = NULL;
828  #if !defined NOPOSIX  #if !defined NOPOSIX
829  int posix = 0;  int posix = 0;
830  #endif  #endif
831  int debug = 0;  int debug = 0;
832  int done = 0;  int done = 0;
833  unsigned char buffer[30000];  int all_use_dfa = 0;
834  unsigned char dbuffer[1024];  int yield = 0;
835    int stack_size;
836    
837    /* These vectors store, end-to-end, a list of captured substring names. Assume
838    that 1024 is plenty long enough for the few names we'll be testing. */
839    
840    uschar copynames[1024];
841    uschar getnames[1024];
842    
843  /* Static so that new_malloc can use it. */  uschar *copynamesptr;
844    uschar *getnamesptr;
845    
846    /* Get buffers from malloc() so that Electric Fence will check their misuse
847    when I am debugging. They grow automatically when very long lines are read. */
848    
849    buffer = (unsigned char *)malloc(buffer_size);
850    dbuffer = (unsigned char *)malloc(buffer_size);
851    pbuffer = (unsigned char *)malloc(buffer_size);
852    
853    /* The outfile variable is static so that new_malloc can use it. */
854    
855  outfile = stdout;  outfile = stdout;
856    
857    /* The following  _setmode() stuff is some Windows magic that tells its runtime
858    library to translate CRLF into a single LF character. At least, that's what
859    I've been told: never having used Windows I take this all on trust. Originally
860    it set 0x8000, but then I was advised that _O_BINARY was better. */
861    
862    #if defined(_WIN32) || defined(WIN32)
863    _setmode( _fileno( stdout ), _O_BINARY );
864    #endif
865    
866  /* Scan options */  /* Scan options */
867    
868  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
# Line 403  while (argc > 1 && argv[op][0] == '-') Line 871  while (argc > 1 && argv[op][0] == '-')
871    
872    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
873      showstore = 1;      showstore = 1;
874    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
875      else if (strcmp(argv[op], "-b") == 0) debug = 1;
876    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
877    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
878      else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
879    #if !defined NODFA
880      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
881    #endif
882    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
883        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
884          *endptr == 0))          *endptr == 0))
# Line 413  while (argc > 1 && argv[op][0] == '-') Line 886  while (argc > 1 && argv[op][0] == '-')
886      op++;      op++;
887      argc--;      argc--;
888      }      }
889      else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
890        {
891        int both = argv[op][2] == 0;
892        int temp;
893        if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
894                         *endptr == 0))
895          {
896          timeitm = temp;
897          op++;
898          argc--;
899          }
900        else timeitm = LOOPREPEAT;
901        if (both) timeit = timeitm;
902        }
903      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
904          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
905            *endptr == 0))
906        {
907    #if defined(_WIN32) || defined(WIN32)
908        printf("PCRE: -S not supported on this OS\n");
909        exit(1);
910    #else
911        int rc;
912        struct rlimit rlim;
913        getrlimit(RLIMIT_STACK, &rlim);
914        rlim.rlim_cur = stack_size * 1024 * 1024;
915        rc = setrlimit(RLIMIT_STACK, &rlim);
916        if (rc != 0)
917          {
918        printf("PCRE: setrlimit() failed with error %d\n", rc);
919        exit(1);
920          }
921        op++;
922        argc--;
923    #endif
924        }
925  #if !defined NOPOSIX  #if !defined NOPOSIX
926    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
927  #endif  #endif
928    else if (strcmp(argv[op], "-C") == 0)    else if (strcmp(argv[op], "-C") == 0)
929      {      {
930      int rc;      int rc;
931        unsigned long int lrc;
932      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
933      printf("Compiled with\n");      printf("Compiled with\n");
934      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
935      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
936        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
937        printf("  %sUnicode properties support\n", rc? "" : "No ");
938      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);      (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
939      printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");      /* Note that these values are always the ASCII values, even
940        in EBCDIC environments. CR is 13 and NL is 10. */
941        printf("  Newline sequence is %s\n", (rc == 13)? "CR" :
942          (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
943          (rc == -2)? "ANYCRLF" :
944          (rc == -1)? "ANY" : "???");
945        (void)pcre_config(PCRE_CONFIG_BSR, &rc);
946        printf("  \\R matches %s\n", rc? "CR, LF, or CRLF only" :
947                                         "all Unicode newlines");
948      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);      (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
949      printf("  Internal link size = %d\n", rc);      printf("  Internal link size = %d\n", rc);
950      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);      (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
951      printf("  POSIX malloc threshold = %d\n", rc);      printf("  POSIX malloc threshold = %d\n", rc);
952      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
953      printf("  Default match limit = %d\n", rc);      printf("  Default match limit = %ld\n", lrc);
954      exit(0);      (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
955        printf("  Default recursion depth limit = %ld\n", lrc);
956        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
957        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
958        goto EXIT;
959        }
960      else if (strcmp(argv[op], "-help") == 0 ||
961               strcmp(argv[op], "--help") == 0)
962        {
963        usage();
964        goto EXIT;
965      }      }
966    else    else
967      {      {
968      printf("** Unknown or malformed option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
969      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");      usage();
970      printf("  -C     show PCRE compile-time options and exit\n");      yield = 1;
971      printf("  -d     debug: show compiled code; implies -i\n"      goto EXIT;
            "  -i     show information about compiled pattern\n"  
            "  -o <n> set size of offsets vector to <n>\n");  
 #if !defined NOPOSIX  
     printf("  -p     use POSIX interface\n");  
 #endif  
     printf("  -s     output store information\n"  
            "  -t     time compilation and execution\n");  
     return 1;  
972      }      }
973    op++;    op++;
974    argc--;    argc--;
# Line 455  while (argc > 1 && argv[op][0] == '-') Line 977  while (argc > 1 && argv[op][0] == '-')
977  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
978    
979  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
980  offsets = malloc(size_offsets_max * sizeof(int));  offsets = (int *)malloc(size_offsets_max * sizeof(int));
981  if (offsets == NULL)  if (offsets == NULL)
982    {    {
983    printf("** Failed to get %d bytes of memory for offsets vector\n",    printf("** Failed to get %d bytes of memory for offsets vector\n",
984      size_offsets_max * sizeof(int));      (int)(size_offsets_max * sizeof(int)));
985    return 1;    yield = 1;
986      goto EXIT;
987    }    }
988    
989  /* Sort out the input and output files */  /* Sort out the input and output files */
990    
991  if (argc > 1)  if (argc > 1)
992    {    {
993    infile = fopen(argv[op], "r");    infile = fopen(argv[op], INPUT_MODE);
994    if (infile == NULL)    if (infile == NULL)
995      {      {
996      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
997      return 1;      yield = 1;
998        goto EXIT;
999      }      }
1000    }    }
1001    
1002  if (argc > 2)  if (argc > 2)
1003    {    {
1004    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], OUTPUT_MODE);
1005    if (outfile == NULL)    if (outfile == NULL)
1006      {      {
1007      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
1008      return 1;      yield = 1;
1009        goto EXIT;
1010      }      }
1011    }    }
1012    
1013  /* Set alternative malloc function */  /* Set alternative malloc function */
1014    
1015  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
1016    pcre_free = new_free;
1017    pcre_stack_malloc = stack_malloc;
1018    pcre_stack_free = stack_free;
1019    
1020  /* Heading line, then prompt for first regex if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
1021    
1022  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1023    
1024  /* Main loop */  /* Main loop */
1025    
# Line 507  while (!done) Line 1035  while (!done)
1035    
1036    const char *error;    const char *error;
1037    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
1038      unsigned char *to_file = NULL;
1039    const unsigned char *tables = NULL;    const unsigned char *tables = NULL;
1040      unsigned long int true_size, true_study_size = 0;
1041      size_t size, regex_gotten_store;
1042    int do_study = 0;    int do_study = 0;
1043    int do_debug = debug;    int do_debug = debug;
1044    int do_G = 0;    int do_G = 0;
1045    int do_g = 0;    int do_g = 0;
1046    int do_showinfo = showinfo;    int do_showinfo = showinfo;
1047    int do_showrest = 0;    int do_showrest = 0;
1048    int erroroffset, len, delimiter;    int do_flip = 0;
1049      int erroroffset, len, delimiter, poffset;
1050    
1051    utf8 = 0;    use_utf8 = 0;
1052      debug_lengths = 1;
1053    
1054    if (infile == stdin) printf("  re> ");    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
   if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;  
1055    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1056    fflush(outfile);    fflush(outfile);
1057    
# Line 527  while (!done) Line 1059  while (!done)
1059    while (isspace(*p)) p++;    while (isspace(*p)) p++;
1060    if (*p == 0) continue;    if (*p == 0) continue;
1061    
1062    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
1063    complete, read more. */  
1064      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1065        {
1066        unsigned long int magic, get_options;
1067        uschar sbuf[8];
1068        FILE *f;
1069    
1070        p++;
1071        pp = p + (int)strlen((char *)p);
1072        while (isspace(pp[-1])) pp--;
1073        *pp = 0;
1074    
1075        f = fopen((char *)p, "rb");
1076        if (f == NULL)
1077          {
1078          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1079          continue;
1080          }
1081    
1082        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1083    
1084        true_size =
1085          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1086        true_study_size =
1087          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1088    
1089        re = (real_pcre *)new_malloc(true_size);
1090        regex_gotten_store = gotten_store;
1091    
1092        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1093    
1094        magic = ((real_pcre *)re)->magic_number;
1095        if (magic != MAGIC_NUMBER)
1096          {
1097          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1098            {
1099            do_flip = 1;
1100            }
1101          else
1102            {
1103            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1104            fclose(f);
1105            continue;
1106            }
1107          }
1108    
1109        fprintf(outfile, "Compiled regex%s loaded from %s\n",
1110          do_flip? " (byte-inverted)" : "", p);
1111    
1112        /* Need to know if UTF-8 for printing data strings */
1113    
1114        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1115        use_utf8 = (get_options & PCRE_UTF8) != 0;
1116    
1117        /* Now see if there is any following study data */
1118    
1119        if (true_study_size != 0)
1120          {
1121          pcre_study_data *psd;
1122    
1123          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1124          extra->flags = PCRE_EXTRA_STUDY_DATA;
1125    
1126          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1127          extra->study_data = psd;
1128    
1129          if (fread(psd, 1, true_study_size, f) != true_study_size)
1130            {
1131            FAIL_READ:
1132            fprintf(outfile, "Failed to read data from %s\n", p);
1133            if (extra != NULL) new_free(extra);
1134            if (re != NULL) new_free(re);
1135            fclose(f);
1136            continue;
1137            }
1138          fprintf(outfile, "Study data loaded from %s\n", p);
1139          do_study = 1;     /* To get the data output if requested */
1140          }
1141        else fprintf(outfile, "No study data\n");
1142    
1143        fclose(f);
1144        goto SHOW_INFO;
1145        }
1146    
1147      /* In-line pattern (the usual case). Get the delimiter and seek the end of
1148      the pattern; if is isn't complete, read more. */
1149    
1150    delimiter = *p++;    delimiter = *p++;
1151    
1152    if (isalnum(delimiter) || delimiter == '\\')    if (isalnum(delimiter) || delimiter == '\\')
1153      {      {
1154      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");      fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1155      goto SKIP_DATA;      goto SKIP_DATA;
1156      }      }
1157    
1158    pp = p;    pp = p;
1159      poffset = p - buffer;
1160    
1161    for(;;)    for(;;)
1162      {      {
# Line 549  while (!done) Line 1167  while (!done)
1167        pp++;        pp++;
1168        }        }
1169      if (*pp != 0) break;      if (*pp != 0) break;
1170        if ((pp = extend_inputline(infile, pp, "    > ")) == NULL)
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
       {  
       fprintf(outfile, "** Expression too long - missing delimiter?\n");  
       goto SKIP_DATA;  
       }  
   
     if (infile == stdin) printf("    > ");  
     if (fgets((char *)pp, len, infile) == NULL)  
1171        {        {
1172        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
1173        done = 1;        done = 1;
# Line 567  while (!done) Line 1176  while (!done)
1176      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1177      }      }
1178    
1179      /* The buffer may have moved while being extended; reset the start of data
1180      pointer to the correct relative point in the buffer. */
1181    
1182      p = buffer + poffset;
1183    
1184    /* If the first character after the delimiter is backslash, make    /* If the first character after the delimiter is backslash, make
1185    the pattern end with backslash. This is purely to provide a way    the pattern end with backslash. This is purely to provide a way
1186    of testing for the error message when a pattern ends with backslash. */    of testing for the error message when a pattern ends with backslash. */
1187    
1188    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
1189    
1190    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
1191      for callouts. */
1192    
1193    *pp++ = 0;    *pp++ = 0;
1194      strcpy((char *)pbuffer, (char *)p);
1195    
1196    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1197    
# Line 587  while (!done) Line 1203  while (!done)
1203      {      {
1204      switch (*pp++)      switch (*pp++)
1205        {        {
1206          case 'f': options |= PCRE_FIRSTLINE; break;
1207        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
1208        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1209        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 595  while (!done) Line 1212  while (!done)
1212    
1213        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
1214        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1215          case 'B': do_debug = 1; break;
1216          case 'C': options |= PCRE_AUTO_CALLOUT; break;
1217        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
1218        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1219          case 'F': do_flip = 1; break;
1220        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
1221        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
1222          case 'J': options |= PCRE_DUPNAMES; break;
1223        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
1224        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;        case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1225    
# Line 609  while (!done) Line 1230  while (!done)
1230        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1231        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
1232        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1233        case '8': options |= PCRE_UTF8; utf8 = 1; break;        case 'Z': debug_lengths = 0; break;
1234          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1235          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1236    
1237        case 'L':        case 'L':
1238        ppp = pp;        ppp = pp;
1239        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows. */
1240          /* The '0' test is just in case this is an unterminated line. */
1241          while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1242        *ppp = 0;        *ppp = 0;
1243        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1244          {          {
1245          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);          fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1246          goto SKIP_DATA;          goto SKIP_DATA;
1247          }          }
1248          locale_set = 1;
1249        tables = pcre_maketables();        tables = pcre_maketables();
1250        pp = ppp;        pp = ppp;
1251        break;        break;
1252    
1253        case '\n': case ' ': break;        case '>':
1254          to_file = pp;
1255          while (*pp != 0) pp++;
1256          while (isspace(pp[-1])) pp--;
1257          *pp = 0;
1258          break;
1259    
1260          case '<':
1261            {
1262            if (strncmp((char *)pp, "JS>", 3) == 0)
1263              {
1264              options |= PCRE_JAVASCRIPT_COMPAT;
1265              pp += 3;
1266              }
1267            else
1268              {
1269              int x = check_newline(pp, outfile);
1270              if (x == 0) goto SKIP_DATA;
1271              options |= x;
1272              while (*pp++ != '>');
1273              }
1274            }
1275          break;
1276    
1277          case '\r':                      /* So that it works in Windows */
1278          case '\n':
1279          case ' ':
1280          break;
1281    
1282        default:        default:
1283        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1284        goto SKIP_DATA;        goto SKIP_DATA;
# Line 640  while (!done) Line 1294  while (!done)
1294      {      {
1295      int rc;      int rc;
1296      int cflags = 0;      int cflags = 0;
1297    
1298      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1299      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1300        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1301        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1302        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1303    
1304      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1305    
1306      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 649  while (!done) Line 1308  while (!done)
1308    
1309      if (rc != 0)      if (rc != 0)
1310        {        {
1311        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1312        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1313        goto SKIP_DATA;        goto SKIP_DATA;
1314        }        }
# Line 661  while (!done) Line 1320  while (!done)
1320  #endif  /* !defined NOPOSIX */  #endif  /* !defined NOPOSIX */
1321    
1322      {      {
1323      if (timeit)      if (timeit > 0)
1324        {        {
1325        register int i;        register int i;
1326        clock_t time_taken;        clock_t time_taken;
1327        clock_t start_time = clock();        clock_t start_time = clock();
1328        for (i = 0; i < LOOPREPEAT; i++)        for (i = 0; i < timeit; i++)
1329          {          {
1330          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1331          if (re != NULL) free(re);          if (re != NULL) free(re);
1332          }          }
1333        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1334        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.4f milliseconds\n",
1335          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /          (((double)time_taken * 1000.0) / (double)timeit) /
1336            (double)CLOCKS_PER_SEC);            (double)CLOCKS_PER_SEC);
1337        }        }
1338    
# Line 690  while (!done) Line 1349  while (!done)
1349          {          {
1350          for (;;)          for (;;)
1351            {            {
1352            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer, NULL) == NULL)
1353              {              {
1354              done = 1;              done = 1;
1355              goto CONTINUE;              goto CONTINUE;
# Line 714  while (!done) Line 1373  while (!done)
1373                sizeof(real_pcre) -                sizeof(real_pcre) -
1374                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1375    
1376        /* Extract the size for possible writing before possibly flipping it,
1377        and remember the store that was got. */
1378    
1379        true_size = ((real_pcre *)re)->size;
1380        regex_gotten_store = gotten_store;
1381    
1382        /* If /S was present, study the regexp to generate additional info to
1383        help with the matching. */
1384    
1385        if (do_study)
1386          {
1387          if (timeit > 0)
1388            {
1389            register int i;
1390            clock_t time_taken;
1391            clock_t start_time = clock();
1392            for (i = 0; i < timeit; i++)
1393              extra = pcre_study(re, study_options, &error);
1394            time_taken = clock() - start_time;
1395            if (extra != NULL) free(extra);
1396            fprintf(outfile, "  Study time %.4f milliseconds\n",
1397              (((double)time_taken * 1000.0) / (double)timeit) /
1398                (double)CLOCKS_PER_SEC);
1399            }
1400          extra = pcre_study(re, study_options, &error);
1401          if (error != NULL)
1402            fprintf(outfile, "Failed to study: %s\n", error);
1403          else if (extra != NULL)
1404            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1405          }
1406    
1407        /* If the 'F' option was present, we flip the bytes of all the integer
1408        fields in the regex data block and the study block. This is to make it
1409        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1410        compiled on a different architecture. */
1411    
1412        if (do_flip)
1413          {
1414          real_pcre *rre = (real_pcre *)re;
1415          rre->magic_number =
1416            byteflip(rre->magic_number, sizeof(rre->magic_number));
1417          rre->size = byteflip(rre->size, sizeof(rre->size));
1418          rre->options = byteflip(rre->options, sizeof(rre->options));
1419          rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1420          rre->top_bracket =
1421            (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1422          rre->top_backref =
1423            (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1424          rre->first_byte =
1425            (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1426          rre->req_byte =
1427            (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1428          rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1429            sizeof(rre->name_table_offset));
1430          rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1431            sizeof(rre->name_entry_size));
1432          rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1433            sizeof(rre->name_count));
1434    
1435          if (extra != NULL)
1436            {
1437            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1438            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1439            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1440            }
1441          }
1442    
1443        /* Extract information from the compiled data if required */
1444    
1445        SHOW_INFO:
1446    
1447        if (do_debug)
1448          {
1449          fprintf(outfile, "------------------------------------------------------------------\n");
1450          pcre_printint(re, outfile, debug_lengths);
1451          }
1452    
1453      if (do_showinfo)      if (do_showinfo)
1454        {        {
1455        unsigned long int get_options;        unsigned long int get_options, all_options;
1456    #if !defined NOINFOCHECK
1457        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1458        int count, backrefmax, first_char, need_char;  #endif
1459          int count, backrefmax, first_char, need_char, okpartial, jchanged,
1460            hascrorlf;
1461        int nameentrysize, namecount;        int nameentrysize, namecount;
1462        const uschar *nametable;        const uschar *nametable;
       size_t size;  
   
       if (do_debug)  
         {  
         fprintf(outfile, "------------------------------------------------------------------\n");  
         print_internals(re, outfile);  
         }  
1463    
1464        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1465        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
# Line 737  while (!done) Line 1469  while (!done)
1469        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1470        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);        new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1471        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);        new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1472        new_info(re, NULL, PCRE_INFO_NAMETABLE, &nametable);        new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1473          new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1474          new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1475          new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1476    
1477    #if !defined NOINFOCHECK
1478        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1479        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
1480          "Error %d from pcre_info()\n", count);          "Error %d from pcre_info()\n", count);
# Line 756  while (!done) Line 1492  while (!done)
1492            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1493              get_options, old_options);              get_options, old_options);
1494          }          }
1495    #endif
1496    
1497        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1498          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1499          size, gotten_store);          (int)size, (int)regex_gotten_store);
1500    
1501        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1502        if (backrefmax > 0)        if (backrefmax > 0)
# Line 777  while (!done) Line 1514  while (!done)
1514            }            }
1515          }          }
1516    
1517          if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1518          if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1519    
1520          all_options = ((real_pcre *)re)->options;
1521          if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1522    
1523        if (get_options == 0) fprintf(outfile, "No options\n");        if (get_options == 0) fprintf(outfile, "No options\n");
1524          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1525            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",            ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1526            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",            ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1527            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",            ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1528            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",            ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1529              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1530            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",            ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1531              ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1532              ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1533            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",            ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1534            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1535            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1536            ((get_options & PCRE_UTF8) != 0)? " utf8" : "");            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1537              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1538              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1539              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1540    
1541          if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1542    
1543          switch (get_options & PCRE_NEWLINE_BITS)
1544            {
1545            case PCRE_NEWLINE_CR:
1546            fprintf(outfile, "Forced newline sequence: CR\n");
1547            break;
1548    
1549        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)          case PCRE_NEWLINE_LF:
1550          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Forced newline sequence: LF\n");
1551            break;
1552    
1553            case PCRE_NEWLINE_CRLF:
1554            fprintf(outfile, "Forced newline sequence: CRLF\n");
1555            break;
1556    
1557            case PCRE_NEWLINE_ANYCRLF:
1558            fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1559            break;
1560    
1561            case PCRE_NEWLINE_ANY:
1562            fprintf(outfile, "Forced newline sequence: ANY\n");
1563            break;
1564    
1565            default:
1566            break;
1567            }
1568    
1569        if (first_char == -1)        if (first_char == -1)
1570          {          {
1571          fprintf(outfile, "First char at start or follows \\n\n");          fprintf(outfile, "First char at start or follows newline\n");
1572          }          }
1573        else if (first_char < 0)        else if (first_char < 0)
1574          {          {
# Line 803  while (!done) Line 1577  while (!done)
1577        else        else
1578          {          {
1579          int ch = first_char & 255;          int ch = first_char & 255;
1580          char *caseless = ((first_char & REQ_CASELESS) == 0)?          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1581            "" : " (caseless)";            "" : " (caseless)";
1582          if (isprint(ch))          if (PRINTHEX(ch))
1583            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1584          else          else
1585            fprintf(outfile, "First char = %d%s\n", ch, caseless);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
# Line 818  while (!done) Line 1592  while (!done)
1592        else        else
1593          {          {
1594          int ch = need_char & 255;          int ch = need_char & 255;
1595          char *caseless = ((need_char & REQ_CASELESS) == 0)?          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1596            "" : " (caseless)";            "" : " (caseless)";
1597          if (isprint(ch))          if (PRINTHEX(ch))
1598            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1599          else          else
1600            fprintf(outfile, "Need char = %d%s\n", ch, caseless);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1601          }          }
       }  
1602    
1603      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1604      help with the matching. */        value, but it varies, depending on the computer architecture, and
1605          so messes up the test suite. (And with the /F option, it might be
1606          flipped.) */
1607    
1608      if (do_study)        if (do_study)
       {  
       if (timeit)  
1609          {          {
1610          register int i;          if (extra == NULL)
1611          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1612          clock_t start_time = clock();          else
1613          for (i = 0; i < LOOPREPEAT; i++)            {
1614            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1615          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1616          if (extra != NULL) free(extra);  
1617          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1618            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /              fprintf(outfile, "No starting byte set\n");
1619              (double)CLOCKS_PER_SEC);            else
1620                {
1621                int i;
1622                int c = 24;
1623                fprintf(outfile, "Starting byte set: ");
1624                for (i = 0; i < 256; i++)
1625                  {
1626                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1627                    {
1628                    if (c > 75)
1629                      {
1630                      fprintf(outfile, "\n  ");
1631                      c = 2;
1632                      }
1633                    if (PRINTHEX(i) && i != ' ')
1634                      {
1635                      fprintf(outfile, "%c ", i);
1636                      c += 2;
1637                      }
1638                    else
1639                      {
1640                      fprintf(outfile, "\\x%02x ", i);
1641                      c += 5;
1642                      }
1643                    }
1644                  }
1645                fprintf(outfile, "\n");
1646                }
1647              }
1648          }          }
1649          }
1650    
1651        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1652        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1653          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1654    
1655        else if (do_showinfo)      if (to_file != NULL)
1656          {
1657          FILE *f = fopen((char *)to_file, "wb");
1658          if (f == NULL)
1659          {          {
1660          size_t size;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1661          uschar *start_bits = NULL;          }
1662          new_info(re, extra, PCRE_INFO_STUDYSIZE, &size);        else
1663          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          {
1664          fprintf(outfile, "Study size = %d\n", size);          uschar sbuf[8];
1665          if (start_bits == NULL)          sbuf[0] = (uschar)((true_size >> 24) & 255);
1666            fprintf(outfile, "No starting character set\n");          sbuf[1] = (uschar)((true_size >> 16) & 255);
1667            sbuf[2] = (uschar)((true_size >>  8) & 255);
1668            sbuf[3] = (uschar)((true_size) & 255);
1669    
1670            sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1671            sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1672            sbuf[6] = (uschar)((true_study_size >>  8) & 255);
1673            sbuf[7] = (uschar)((true_study_size) & 255);
1674    
1675            if (fwrite(sbuf, 1, 8, f) < 8 ||
1676                fwrite(re, 1, true_size, f) < true_size)
1677              {
1678              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1679              }
1680          else          else
1681            {            {
1682            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1683            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1684              {              {
1685              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1686                    true_study_size)
1687                {                {
1688                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1689                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1690                }                }
1691                else fprintf(outfile, "Study data written to %s\n", to_file);
1692    
1693              }              }
           fprintf(outfile, "\n");  
1694            }            }
1695            fclose(f);
1696          }          }
1697    
1698          new_free(re);
1699          if (extra != NULL) new_free(extra);
1700          if (tables != NULL) new_free((void *)tables);
1701          continue;  /* With next regex */
1702        }        }
1703      }      }        /* End of non-POSIX compile */
1704    
1705    /* Read data lines and test them */    /* Read data lines and test them */
1706    
1707    for (;;)    for (;;)
1708      {      {
1709      unsigned char *q;      uschar *q;
1710      unsigned char *bptr = dbuffer;      uschar *bptr;
1711      int *use_offsets = offsets;      int *use_offsets = offsets;
1712      int use_size_offsets = size_offsets;      int use_size_offsets = size_offsets;
1713      int callout_data = 0;      int callout_data = 0;
1714      int callout_data_set = 0;      int callout_data_set = 0;
1715      int count, c;      int count, c;
1716      int copystrings = 0;      int copystrings = 0;
1717      int find_match_limit = 0;      int find_match_limit = default_find_match_limit;
1718      int getstrings = 0;      int getstrings = 0;
1719      int getlist = 0;      int getlist = 0;
1720      int gmatched = 0;      int gmatched = 0;
1721      int start_offset = 0;      int start_offset = 0;
1722      int g_notempty = 0;      int g_notempty = 0;
1723        int use_dfa = 0;
1724    
1725      options = 0;      options = 0;
1726    
1727        *copynames = 0;
1728        *getnames = 0;
1729    
1730        copynamesptr = copynames;
1731        getnamesptr = getnames;
1732    
1733      pcre_callout = callout;      pcre_callout = callout;
1734      first_callout = 1;      first_callout = 1;
1735      callout_extra = 0;      callout_extra = 0;
1736      callout_count = 0;      callout_count = 0;
1737      callout_fail_count = 999999;      callout_fail_count = 999999;
1738      callout_fail_id = -1;      callout_fail_id = -1;
1739        show_malloc = 0;
1740    
1741        if (extra != NULL) extra->flags &=
1742          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1743    
1744      if (infile == stdin) printf("data> ");      len = 0;
1745      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      for (;;)
1746        {        {
1747        done = 1;        if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1748        goto CONTINUE;          {
1749            if (len > 0) break;
1750            done = 1;
1751            goto CONTINUE;
1752            }
1753          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1754          len = (int)strlen((char *)buffer);
1755          if (buffer[len-1] == '\n') break;
1756        }        }
     if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);  
1757    
     len = (int)strlen((char *)buffer);  
1758      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1759      buffer[len] = 0;      buffer[len] = 0;
1760      if (len == 0) break;      if (len == 0) break;
# Line 937  while (!done) Line 1762  while (!done)
1762      p = buffer;      p = buffer;
1763      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1764    
1765      q = dbuffer;      bptr = q = dbuffer;
1766      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1767        {        {
1768        int i = 0;        int i = 0;
# Line 959  while (!done) Line 1784  while (!done)
1784          c -= '0';          c -= '0';
1785          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1786            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1787    
1788    #if !defined NOUTF8
1789            if (use_utf8 && c > 255)
1790              {
1791              unsigned char buff8[8];
1792              int ii, utn;
1793              utn = ord2utf8(c, buff8);
1794              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1795              c = buff8[ii];   /* Last byte */
1796              }
1797    #endif
1798          break;          break;
1799    
1800          case 'x':          case 'x':
1801    
1802          /* Handle \x{..} specially - new Perl thing for utf8 */          /* Handle \x{..} specially - new Perl thing for utf8 */
1803    
1804    #if !defined NOUTF8
1805          if (*p == '{')          if (*p == '{')
1806            {            {
1807            unsigned char *pt = p;            unsigned char *pt = p;
# Line 973  while (!done) Line 1810  while (!done)
1810              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1811            if (*pt == '}')            if (*pt == '}')
1812              {              {
1813              unsigned char buffer[8];              unsigned char buff8[8];
1814              int ii, utn;              int ii, utn;
1815              utn = ord2utf8(c, buffer);              if (use_utf8)
1816              for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];                {
1817              c = buffer[ii];   /* Last byte */                utn = ord2utf8(c, buff8);
1818                  for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1819                  c = buff8[ii];   /* Last byte */
1820                  }
1821                else
1822                 {
1823                 if (c > 255)
1824                   fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1825                     "UTF-8 mode is not enabled.\n"
1826                     "** Truncation will probably give the wrong result.\n", c);
1827                 }
1828              p = pt + 1;              p = pt + 1;
1829              break;              break;
1830              }              }
1831            /* Not correct form; fall through */            /* Not correct form; fall through */
1832            }            }
1833    #endif
1834    
1835          /* Ordinary \x */          /* Ordinary \x */
1836    
# Line 994  while (!done) Line 1842  while (!done)
1842            }            }
1843          break;          break;
1844    
1845          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1846          p--;          p--;
1847          continue;          continue;
1848    
1849            case '>':
1850            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1851            continue;
1852    
1853          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1854          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1855          continue;          continue;
# Line 1014  while (!done) Line 1866  while (!done)
1866            }            }
1867          else if (isalnum(*p))          else if (isalnum(*p))
1868            {            {
1869            uschar name[256];            uschar *npp = copynamesptr;
1870            uschar *pp = name;            while (isalnum(*p)) *npp++ = *p++;
1871            while (isalnum(*p)) *pp++ = *p++;            *npp++ = 0;
1872            *pp = 0;            *npp = 0;
1873            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)copynamesptr);
1874            if (n < 0)            if (n < 0)
1875              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1876            else copystrings |= 1 << n;            copynamesptr = npp;
1877            }            }
1878          else if (*p == '+')          else if (*p == '+')
1879            {            {
# Line 1059  while (!done) Line 1911  while (!done)
1911            }            }
1912          continue;          continue;
1913    
1914    #if !defined NODFA
1915            case 'D':
1916    #if !defined NOPOSIX
1917            if (posix || do_posix)
1918              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1919            else
1920    #endif
1921              use_dfa = 1;
1922            continue;
1923    
1924            case 'F':
1925            options |= PCRE_DFA_SHORTEST;
1926            continue;
1927    #endif
1928    
1929          case 'G':          case 'G':
1930          if (isdigit(*p))          if (isdigit(*p))
1931            {            {
# Line 1067  while (!done) Line 1934  while (!done)
1934            }            }
1935          else if (isalnum(*p))          else if (isalnum(*p))
1936            {            {
1937            uschar name[256];            uschar *npp = getnamesptr;
1938            uschar *pp = name;            while (isalnum(*p)) *npp++ = *p++;
1939            while (isalnum(*p)) *pp++ = *p++;            *npp++ = 0;
1940            *pp = 0;            *npp = 0;
1941            n = pcre_get_stringnumber(re, (char *)name);            n = pcre_get_stringnumber(re, (char *)getnamesptr);
1942            if (n < 0)            if (n < 0)
1943              fprintf(outfile, "no parentheses with name \"%s\"\n", name);              fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1944            else getstrings |= 1 << n;            getnamesptr = npp;
1945            }            }
1946          continue;          continue;
1947    
# Line 1096  while (!done) Line 1963  while (!done)
1963            {            {
1964            size_offsets_max = n;            size_offsets_max = n;
1965            free(offsets);            free(offsets);
1966            use_offsets = offsets = malloc(size_offsets_max * sizeof(int));            use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1967            if (offsets == NULL)            if (offsets == NULL)
1968              {              {
1969              printf("** Failed to get %d bytes of memory for offsets vector\n",              printf("** Failed to get %d bytes of memory for offsets vector\n",
1970                size_offsets_max * sizeof(int));                (int)(size_offsets_max * sizeof(int)));
1971              return 1;              yield = 1;
1972                goto EXIT;
1973              }              }
1974            }            }
1975          use_size_offsets = n;          use_size_offsets = n;
1976          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */          if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1977          continue;          continue;
1978    
1979            case 'P':
1980            options |= PCRE_PARTIAL;
1981            continue;
1982    
1983            case 'Q':
1984            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1985            if (extra == NULL)
1986              {
1987              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1988              extra->flags = 0;
1989              }
1990            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1991            extra->match_limit_recursion = n;
1992            continue;
1993    
1994            case 'q':
1995            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1996            if (extra == NULL)
1997              {
1998              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1999              extra->flags = 0;
2000              }
2001            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2002            extra->match_limit = n;
2003            continue;
2004    
2005    #if !defined NODFA
2006            case 'R':
2007            options |= PCRE_DFA_RESTART;
2008            continue;
2009    #endif
2010    
2011            case 'S':
2012            show_malloc = 1;
2013            continue;
2014    
2015            case 'Y':
2016            options |= PCRE_NO_START_OPTIMIZE;
2017            continue;
2018    
2019          case 'Z':          case 'Z':
2020          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
2021          continue;          continue;
2022    
2023            case '?':
2024            options |= PCRE_NO_UTF8_CHECK;
2025            continue;
2026    
2027            case '<':
2028              {
2029              int x = check_newline(p, outfile);
2030              if (x == 0) goto NEXT_DATA;
2031              options |= x;
2032              while (*p++ != '>');
2033              }
2034            continue;
2035          }          }
2036        *q++ = c;        *q++ = c;
2037        }        }
2038      *q = 0;      *q = 0;
2039      len = q - dbuffer;      len = q - dbuffer;
2040    
2041        /* Move the data to the end of the buffer so that a read over the end of
2042        the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2043        we are using the POSIX interface, we must include the terminating zero. */
2044    
2045    #if !defined NOPOSIX
2046        if (posix || do_posix)
2047          {
2048          memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2049          bptr += buffer_size - len - 1;
2050          }
2051        else
2052    #endif
2053          {
2054          memmove(bptr + buffer_size - len, bptr, len);
2055          bptr += buffer_size - len;
2056          }
2057    
2058        if ((all_use_dfa || use_dfa) && find_match_limit)
2059          {
2060          printf("**Match limit not relevant for DFA matching: ignored\n");
2061          find_match_limit = 0;
2062          }
2063    
2064      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
2065      support timing or playing with the match limit or callout data. */      support timing or playing with the match limit or callout data. */
2066    
# Line 1127  while (!done) Line 2071  while (!done)
2071        int eflags = 0;        int eflags = 0;
2072        regmatch_t *pmatch = NULL;        regmatch_t *pmatch = NULL;
2073        if (use_size_offsets > 0)        if (use_size_offsets > 0)
2074          pmatch = malloc(sizeof(regmatch_t) * use_size_offsets);          pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2075        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2076        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2077          if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2078    
2079        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2080    
2081        if (rc != 0)        if (rc != 0)
2082          {          {
2083          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2084          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2085          }          }
2086          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2087                  != 0)
2088            {
2089            fprintf(outfile, "Matched with REG_NOSUB\n");
2090            }
2091        else        else
2092          {          {
2093          size_t i;          size_t i;
# Line 1169  while (!done) Line 2119  while (!done)
2119    
2120      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
2121        {        {
2122        if (timeit)        if (timeitm > 0)
2123          {          {
2124          register int i;          register int i;
2125          clock_t time_taken;          clock_t time_taken;
2126          clock_t start_time = clock();          clock_t start_time = clock();
2127          for (i = 0; i < LOOPREPEAT; i++)  
2128    #if !defined NODFA
2129            if (all_use_dfa || use_dfa)
2130              {
2131              int workspace[1000];
2132              for (i = 0; i < timeitm; i++)
2133                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2134                  options | g_notempty, use_offsets, use_size_offsets, workspace,
2135                  sizeof(workspace)/sizeof(int));
2136              }
2137            else
2138    #endif
2139    
2140            for (i = 0; i < timeitm; i++)
2141            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
2142              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
2143    
2144          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2145          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
2146            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /            (((double)time_taken * 1000.0) / (double)timeitm) /
2147              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2148          }          }
2149    
2150        /* If find_match_limit is set, we want to do repeated matches with        /* If find_match_limit is set, we want to do repeated matches with
2151        varying limits in order to find the minimum value. */        varying limits in order to find the minimum value for the match limit and
2152          for the recursion limit. */
2153    
2154        if (find_match_limit)        if (find_match_limit)
2155          {          {
         int min = 0;  
         int mid = 64;  
         int max = -1;  
   
2156          if (extra == NULL)          if (extra == NULL)
2157            {            {
2158            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2159            extra->flags = 0;            extra->flags = 0;
2160            }            }
         extra->flags |= PCRE_EXTRA_MATCH_LIMIT;  
   
         for (;;)  
           {  
           extra->match_limit = mid;  
           count = pcre_exec(re, extra, (char *)bptr, len, start_offset,  
             options | g_notempty, use_offsets, use_size_offsets);  
           if (count == PCRE_ERROR_MATCHLIMIT)  
             {  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             min = mid;  
             mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;  
             }  
           else if (count >= 0 || count == PCRE_ERROR_NOMATCH)  
             {  
             if (mid == min + 1)  
               {  
               fprintf(outfile, "Minimum match limit = %d\n", mid);  
               break;  
               }  
             /* fprintf(outfile, "Testing match limit = %d\n", mid); */  
             max = mid;  
             mid = (min + mid)/2;  
             }  
           else break;    /* Some other error */  
           }  
2161    
2162          extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;          (void)check_match_limit(re, extra, bptr, len, start_offset,
2163              options|g_notempty, use_offsets, use_size_offsets,
2164              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2165              PCRE_ERROR_MATCHLIMIT, "match()");
2166    
2167            count = check_match_limit(re, extra, bptr, len, start_offset,
2168              options|g_notempty, use_offsets, use_size_offsets,
2169              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2170              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2171          }          }
2172    
2173        /* If callout_data is set, use the interface with additional data */        /* If callout_data is set, use the interface with additional data */
# Line 1233  while (!done) Line 2176  while (!done)
2176          {          {
2177          if (extra == NULL)          if (extra == NULL)
2178            {            {
2179            extra = malloc(sizeof(pcre_extra));            extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2180            extra->flags = 0;            extra->flags = 0;
2181            }            }
2182          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2183          extra->callout_data = (void *)callout_data;          extra->callout_data = &callout_data;
2184          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2185            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
2186          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
# Line 1246  while (!done) Line 2189  while (!done)
2189        /* The normal case is just to do the match once, with the default        /* The normal case is just to do the match once, with the default
2190        value of match_limit. */        value of match_limit. */
2191    
2192        else count = pcre_exec(re, extra, (char *)bptr, len,  #if !defined NODFA
2193          start_offset, options | g_notempty, use_offsets, use_size_offsets);        else if (all_use_dfa || use_dfa)
2194            {
2195            int workspace[1000];
2196            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2197              options | g_notempty, use_offsets, use_size_offsets, workspace,
2198              sizeof(workspace)/sizeof(int));
2199            if (count == 0)
2200              {
2201              fprintf(outfile, "Matched, but too many subsidiary matches\n");
2202              count = use_size_offsets/2;
2203              }
2204            }
2205    #endif
2206    
2207        if (count == 0)        else
2208          {          {
2209          fprintf(outfile, "Matched, but too many substrings\n");          count = pcre_exec(re, extra, (char *)bptr, len,
2210          count = use_size_offsets/3;            start_offset, options | g_notempty, use_offsets, use_size_offsets);
2211            if (count == 0)
2212              {
2213              fprintf(outfile, "Matched, but too many substrings\n");
2214              count = use_size_offsets/3;
2215              }
2216          }          }
2217    
2218        /* Matched */        /* Matched */
2219    
2220        if (count >= 0)        if (count >= 0)
2221          {          {
2222          int i;          int i, maxcount;
2223    
2224    #if !defined NODFA
2225            if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2226    #endif
2227              maxcount = use_size_offsets/3;
2228    
2229            /* This is a check against a lunatic return value. */
2230    
2231            if (count > maxcount)
2232              {
2233              fprintf(outfile,
2234                "** PCRE error: returned count %d is too big for offset size %d\n",
2235                count, use_size_offsets);
2236              count = use_size_offsets/3;
2237              if (do_g || do_G)
2238                {
2239                fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2240                do_g = do_G = FALSE;        /* Break g/G loop */
2241                }
2242              }
2243    
2244          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
2245            {            {
2246            if (use_offsets[i] < 0)            if (use_offsets[i] < 0)
# Line 1287  while (!done) Line 2268  while (!done)
2268            {            {
2269            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
2270              {              {
2271              char copybuffer[16];              char copybuffer[256];
2272              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2273                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
2274              if (rc < 0)              if (rc < 0)
# Line 1297  while (!done) Line 2278  while (!done)
2278              }              }
2279            }            }
2280    
2281            for (copynamesptr = copynames;
2282                 *copynamesptr != 0;
2283                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2284              {
2285              char copybuffer[256];
2286              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2287                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2288              if (rc < 0)
2289                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2290              else
2291                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2292              }
2293    
2294          for (i = 0; i < 32; i++)          for (i = 0; i < 32; i++)
2295            {            {
2296            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
# Line 1309  while (!done) Line 2303  while (!done)
2303              else              else
2304                {                {
2305                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
               /* free((void *)substring); */  
2306                pcre_free_substring(substring);                pcre_free_substring(substring);
2307                }                }
2308              }              }
2309            }            }
2310    
2311            for (getnamesptr = getnames;
2312                 *getnamesptr != 0;
2313                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2314              {
2315              const char *substring;
2316              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2317                count, (char *)getnamesptr, &substring);
2318              if (rc < 0)
2319                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2320              else
2321                {
2322                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2323                pcre_free_substring(substring);
2324                }
2325              }
2326    
2327          if (getlist)          if (getlist)
2328            {            {
2329            const char **stringlist;            const char **stringlist;
# Line 1334  while (!done) Line 2343  while (!done)
2343            }            }
2344          }          }
2345    
2346          /* There was a partial match */
2347    
2348          else if (count == PCRE_ERROR_PARTIAL)
2349            {
2350            fprintf(outfile, "Partial match");
2351    #if !defined NODFA
2352            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2353              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2354                bptr + use_offsets[0]);
2355    #endif
2356            fprintf(outfile, "\n");
2357            break;  /* Out of the /g loop */
2358            }
2359    
2360        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
2361        g_notempty after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end. We want
2362        We want to advance the start offset, and continue. Fudge the offset        to advance the start offset, and continue. We won't be at the end of the
2363        values to achieve this. We won't be at the end of the string - that        string - that was checked before setting g_notempty.
2364        was checked before setting g_notempty. */  
2365          Complication arises in the case when the newline option is "any" or
2366          "anycrlf". If the previous match was at the end of a line terminated by
2367          CRLF, an advance of one character just passes the \r, whereas we should
2368          prefer the longer newline sequence, as does the code in pcre_exec().
2369          Fudge the offset value to achieve this.
2370    
2371          Otherwise, in the case of UTF-8 matching, the advance must be one
2372          character, not one byte. */
2373    
2374        else        else
2375          {          {
2376          if (g_notempty != 0)          if (g_notempty != 0)
2377            {            {
2378              int onechar = 1;
2379              unsigned int obits = ((real_pcre *)re)->options;
2380            use_offsets[0] = start_offset;            use_offsets[0] = start_offset;
2381            use_offsets[1] = start_offset + 1;            if ((obits & PCRE_NEWLINE_BITS) == 0)
2382                {
2383                int d;
2384                (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2385                /* Note that these values are always the ASCII ones, even in
2386                EBCDIC environments. CR = 13, NL = 10. */
2387                obits = (d == 13)? PCRE_NEWLINE_CR :
2388                        (d == 10)? PCRE_NEWLINE_LF :
2389                        (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2390                        (d == -2)? PCRE_NEWLINE_ANYCRLF :
2391                        (d == -1)? PCRE_NEWLINE_ANY : 0;
2392                }
2393              if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2394                   (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2395                  &&
2396                  start_offset < len - 1 &&
2397                  bptr[start_offset] == '\r' &&
2398                  bptr[start_offset+1] == '\n')
2399                onechar++;
2400              else if (use_utf8)
2401                {
2402                while (start_offset + onechar < len)
2403                  {
2404                  int tb = bptr[start_offset+onechar];
2405                  if (tb <= 127) break;
2406                  tb &= 0xc0;
2407                  if (tb != 0 && tb != 0xc0) onechar++;
2408                  }
2409                }
2410              use_offsets[1] = start_offset + onechar;
2411            }            }
2412          else          else
2413            {            {
2414            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
2415              {              {
2416              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
2417              }              }
2418              else fprintf(outfile, "Error %d\n", count);
2419            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
2420            }            }
2421          }          }
# Line 1370  while (!done) Line 2432  while (!done)
2432        character. */        character. */
2433    
2434        g_notempty = 0;        g_notempty = 0;
2435    
2436        if (use_offsets[0] == use_offsets[1])        if (use_offsets[0] == use_offsets[1])
2437          {          {
2438          if (use_offsets[0] == len) break;          if (use_offsets[0] == len) break;
# Line 1388  while (!done) Line 2451  while (!done)
2451          len -= use_offsets[1];          len -= use_offsets[1];
2452          }          }
2453        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
2454    
2455        NEXT_DATA: continue;
2456      }    /* End of loop for data lines */      }    /* End of loop for data lines */
2457    
2458    CONTINUE:    CONTINUE:
# Line 1396  while (!done) Line 2461  while (!done)
2461    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2462  #endif  #endif
2463    
2464    if (re != NULL) free(re);    if (re != NULL) new_free(re);
2465    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
2466    if (tables != NULL)    if (tables != NULL)
2467      {      {
2468      free((void *)tables);      new_free((void *)tables);
2469      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
2470        locale_set = 0;
2471      }      }
2472    }    }
2473    
2474  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
2475  return 0;  
2476    EXIT:
2477    
2478    if (infile != NULL && infile != stdin) fclose(infile);
2479    if (outfile != NULL && outfile != stdout) fclose(outfile);
2480    
2481    free(buffer);
2482    free(dbuffer);
2483    free(pbuffer);
2484    free(offsets);
2485    
2486    return yield;
2487  }  }
2488    
2489  /* End */  /* End of pcretest.c */

Legend:
Removed from v.65  
changed lines
  Added in v.392

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12