/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 43 by nigel, Sat Feb 24 21:39:21 2007 UTC revision 75 by nigel, Sat Feb 24 21:40:37 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47  /* Use the internal info for displaying the results of pcre_study(). */  /* We need the internal info for displaying the results of pcre_study(). Also
48    for getting the opcodes for showing compiled code. */
49    
50    #define PCRE_SPY        /* For Win32 build, import data, not export */
51  #include "internal.h"  #include "internal.h"
52    
53  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
# Line 29  Makefile. */ Line 66  Makefile. */
66  #endif  #endif
67  #endif  #endif
68    
69  #define LOOPREPEAT 20000  #define LOOPREPEAT 500000
70    
71    #define BUFFER_SIZE 30000
72    #define PBUFFER_SIZE BUFFER_SIZE
73    #define DBUFFER_SIZE BUFFER_SIZE
74    
75    
76  static FILE *outfile;  static FILE *outfile;
77  static int log_store = 0;  static int log_store = 0;
78    static int callout_count;
79    static int callout_extra;
80    static int callout_fail_count;
81    static int callout_fail_id;
82    static int first_callout;
83    static int show_malloc;
84    static int use_utf8;
85  static size_t gotten_store;  static size_t gotten_store;
86    
87    static uschar *pbuffer = NULL;
88    
89    
90    static const int utf8_table1[] = {
91      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
92    
93    static const int utf8_table2[] = {
94      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
95    
96    static const int utf8_table3[] = {
97      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
98    
99    
100    
101    /*************************************************
102    *         Print compiled regex                   *
103    *************************************************/
104    
105    /* The code for doing this is held in a separate file that is also included in
106    pcre.c when it is compiled with the debug switch. It defines a function called
107    print_internals(), which uses a table of opcode lengths defined by the macro
108    OP_LENGTHS, whose name must be OP_lengths. It also uses a table that translates
109    Unicode property names to numbers; this is kept in a separate file. */
110    
111    static uschar OP_lengths[] = { OP_LENGTHS };
112    
113    #include "ucp.h"
114    #include "ucptypetable.c"
115    #include "printint.c"
116    
117    
118    
119    /*************************************************
120    *          Read number from string               *
121    *************************************************/
122    
123    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
124    around with conditional compilation, just do the job by hand. It is only used
125    for unpicking the -o argument, so just keep it simple.
126    
127    Arguments:
128      str           string to be converted
129      endptr        where to put the end pointer
130    
131    Returns:        the unsigned long
132    */
133    
134    static int
135    get_value(unsigned char *str, unsigned char **endptr)
136    {
137    int result = 0;
138    while(*str != 0 && isspace(*str)) str++;
139    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
140    *endptr = str;
141    return(result);
142    }
143    
144    
145    
146  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
147  code as contained in pcre.c under the DEBUG macro. */  *       Convert character value to UTF-8         *
148    *************************************************/
149    
150  static const char *OP_names[] = {  /* This function takes an integer value in the range 0 - 0x7fffffff
151    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  and encodes it as a UTF-8 character in 0 to 6 bytes.
   "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  
   "Opt", "^", "$", "Any", "chars", "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref", "Recurse",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
152    
153    Arguments:
154      cvalue     the character value
155      buffer     pointer to buffer for result - at least 6 bytes long
156    
157    Returns:     number of characters placed in the buffer
158                 -1 if input character is negative
159                 0 if input character is positive but too big (only when
160                 int is longer than 32 bits)
161    */
162    
163  static void print_internals(pcre *re)  static int
164    ord2utf8(int cvalue, unsigned char *buffer)
165  {  {
166  unsigned char *code = ((real_pcre *)re)->code;  register int i, j;
167    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
168      if (cvalue <= utf8_table1[i]) break;
169    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
170    if (cvalue < 0) return -1;
171    
172    buffer += i;
173    for (j = i; j > 0; j--)
174     {
175     *buffer-- = 0x80 | (cvalue & 0x3f);
176     cvalue >>= 6;
177     }
178    *buffer = utf8_table2[i] | cvalue;
179    return i + 1;
180    }
181    
182    
183    /*************************************************
184    *            Convert UTF-8 string to value       *
185    *************************************************/
186    
187  fprintf(outfile, "------------------------------------------------------------------\n");  /* This function takes one or more bytes that represents a UTF-8 character,
188    and returns the value of the character.
189    
190  for(;;)  Argument:
191      buffer   a pointer to the byte vector
192      vptr     a pointer to an int to receive the value
193    
194    Returns:   >  0 => the number of bytes consumed
195               -6 to 0 => malformed UTF-8 character at offset = (-return)
196    */
197    
198    static int
199    utf82ord(unsigned char *buffer, int *vptr)
200    {
201    int c = *buffer++;
202    int d = c;
203    int i, j, s;
204    
205    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
206    {    {
207    int c;    if ((d & 0x80) == 0) break;
208    int charlength;    d <<= 1;
209      }
210    
211    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
212    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
213    
214    if (*code >= OP_BRA)  /* i now has a value in the range 1-5 */
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
215    
216    else switch(*code)  s = 6*i;
217      {  d = (c & utf8_table3[i]) << s;
     case OP_END:  
     fprintf(outfile, "    %s\n", OP_names[*code]);  
     fprintf(outfile, "------------------------------------------------------------------\n");  
     return;  
   
     case OP_OPT:  
     fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
218    
219      case OP_CLASS:  for (j = 0; j < i; j++)
220        {    {
221        int i, min, max;    c = *buffer++;
222        code++;    if ((c & 0xc0) != 0x80) return -(j+1);
223        fprintf(outfile, "    [");    s -= 6;
224      d |= (c & 0x3f) << s;
225        for (i = 0; i < 256; i++)    }
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
226    
227        CLASS_REF_REPEAT:  /* Check that encoding was the correct unique one */
228    
229        switch(*code)  for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
230          {    if (d <= utf8_table1[j]) break;
231          case OP_CRSTAR:  if (j != i) return -(i+1);
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
232    
233          case OP_CRRANGE:  /* Valid value */
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
234    
235          default:  *vptr = d;
236          code--;  return i+1;
237    }
238    
239    
240    
241    /*************************************************
242    *             Print character string             *
243    *************************************************/
244    
245    /* Character string printing function. Must handle UTF-8 strings in utf8
246    mode. Yields number of characters printed. If handed a NULL file, just counts
247    chars without printing. */
248    
249    static int pchars(unsigned char *p, int length, FILE *f)
250    {
251    int c;
252    int yield = 0;
253    
254    while (length-- > 0)
255      {
256      if (use_utf8)
257        {
258        int rc = utf82ord(p, &c);
259    
260        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
261          {
262          length -= rc - 1;
263          p += rc;
264          if (c < 256 && isprint(c))
265            {
266            if (f != NULL) fprintf(f, "%c", c);
267            yield++;
268          }          }
269          else
270            {
271            int n;
272            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
273            yield += n;
274            }
275          continue;
276        }        }
277      break;      }
278    
279      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
280    
281      default:    if (isprint(c = *(p++)))
282      fprintf(outfile, "    %s", OP_names[*code]);      {
283      break;      if (f != NULL) fprintf(f, "%c", c);
284        yield++;
285        }
286      else
287        {
288        if (f != NULL) fprintf(f, "\\x%02x", c);
289        yield += 4;
290      }      }
   
   code++;  
   fprintf(outfile, "\n");  
291    }    }
292    
293    return yield;
294  }  }
295    
296    
297    
298  /* Character string printing function. */  /*************************************************
299    *              Callout function                  *
300    *************************************************/
301    
302    /* Called from PCRE as a result of the (?C) item. We print out where we are in
303    the match. Yield zero unless more callouts than the fail count, or the callout
304    data is not zero. */
305    
306  static void pchars(unsigned char *p, int length)  static int callout(pcre_callout_block *cb)
307  {  {
308  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
309  while (length-- > 0)  int i, pre_start, post_start, subject_length;
310    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
311      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
312      {
313      fprintf(f, "Callout %d: last capture = %d\n",
314        cb->callout_number, cb->capture_last);
315    
316      for (i = 0; i < cb->capture_top * 2; i += 2)
317        {
318        if (cb->offset_vector[i] < 0)
319          fprintf(f, "%2d: <unset>\n", i/2);
320        else
321          {
322          fprintf(f, "%2d: ", i/2);
323          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
324            cb->offset_vector[i+1] - cb->offset_vector[i], f);
325          fprintf(f, "\n");
326          }
327        }
328      }
329    
330    /* Re-print the subject in canonical form, the first time or if giving full
331    datails. On subsequent calls in the same match, we use pchars just to find the
332    printed lengths of the substrings. */
333    
334    if (f != NULL) fprintf(f, "--->");
335    
336    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
337    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
338      cb->current_position - cb->start_match, f);
339    
340    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
341    
342    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
343      cb->subject_length - cb->current_position, f);
344    
345    if (f != NULL) fprintf(f, "\n");
346    
347    /* Always print appropriate indicators, with callout number if not already
348    shown. For automatic callouts, show the pattern offset. */
349    
350    if (cb->callout_number == 255)
351      {
352      fprintf(outfile, "%+3d ", cb->pattern_position);
353      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
354      }
355    else
356      {
357      if (callout_extra) fprintf(outfile, "    ");
358        else fprintf(outfile, "%3d ", cb->callout_number);
359      }
360    
361    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
362    fprintf(outfile, "^");
363    
364    if (post_start > 0)
365      {
366      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
367      fprintf(outfile, "^");
368      }
369    
370    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
371      fprintf(outfile, " ");
372    
373    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
374      pbuffer + cb->pattern_position);
375    
376    fprintf(outfile, "\n");
377    first_callout = 0;
378    
379    if (cb->callout_data != NULL)
380      {
381      int callout_data = *((int *)(cb->callout_data));
382      if (callout_data != 0)
383        {
384        fprintf(outfile, "Callout data = %d\n", callout_data);
385        return callout_data;
386        }
387      }
388    
389    return (cb->callout_number != callout_fail_id)? 0 :
390           (++callout_count >= callout_fail_count)? 1 : 0;
391  }  }
392    
393    
394    /*************************************************
395    *            Local malloc functions              *
396    *************************************************/
397    
398  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
399  compiled re. */  compiled re. */
400    
401  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
402  {  {
403    void *block = malloc(size);
404  gotten_store = size;  gotten_store = size;
405  if (log_store)  if (show_malloc)
406    fprintf(outfile, "Memory allocation (code space): %d\n",    fprintf(outfile, "malloc       %3d %p\n", size, block);
407      (int)((int)size - offsetof(real_pcre, code[0])));  return block;
 return malloc(size);  
408  }  }
409    
410    static void new_free(void *block)
411    {
412    if (show_malloc)
413      fprintf(outfile, "free             %p\n", block);
414    free(block);
415    }
416    
417    
418    /* For recursion malloc/free, to test stacking calls */
419    
420    static void *stack_malloc(size_t size)
421    {
422    void *block = malloc(size);
423    if (show_malloc)
424      fprintf(outfile, "stack_malloc %3d %p\n", size, block);
425    return block;
426    }
427    
428    static void stack_free(void *block)
429    {
430    if (show_malloc)
431      fprintf(outfile, "stack_free       %p\n", block);
432    free(block);
433    }
434    
435    
436    /*************************************************
437    *          Call pcre_fullinfo()                  *
438    *************************************************/
439    
440  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function */
441    
# Line 303  if ((rc = pcre_fullinfo(re, study, optio Line 448  if ((rc = pcre_fullinfo(re, study, optio
448    
449    
450    
451    /*************************************************
452    *         Byte flipping function                 *
453    *************************************************/
454    
455    static long int
456    byteflip(long int value, int n)
457    {
458    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
459    return ((value & 0x000000ff) << 24) |
460           ((value & 0x0000ff00) <<  8) |
461           ((value & 0x00ff0000) >>  8) |
462           ((value & 0xff000000) >> 24);
463    }
464    
465    
466    
467    
468    /*************************************************
469    *                Main Program                    *
470    *************************************************/
471    
472  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
473  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 317  int op = 1; Line 482  int op = 1;
482  int timeit = 0;  int timeit = 0;
483  int showinfo = 0;  int showinfo = 0;
484  int showstore = 0;  int showstore = 0;
485    int size_offsets = 45;
486    int size_offsets_max;
487    int *offsets;
488    #if !defined NOPOSIX
489  int posix = 0;  int posix = 0;
490    #endif
491  int debug = 0;  int debug = 0;
492  int done = 0;  int done = 0;
 unsigned char buffer[30000];  
 unsigned char dbuffer[1024];  
493    
494  /* Static so that new_malloc can use it. */  unsigned char *buffer;
495    unsigned char *dbuffer;
496    
497    /* Get buffers from malloc() so that Electric Fence will check their misuse
498    when I am debugging. */
499    
500    buffer = (unsigned char *)malloc(BUFFER_SIZE);
501    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
502    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
503    
504    /* The outfile variable is static so that new_malloc can use it. The _setmode()
505    stuff is some magic that I don't understand, but which apparently does good
506    things in Windows. It's related to line terminations.  */
507    
508    #if defined(_WIN32) || defined(WIN32)
509    _setmode( _fileno( stdout ), 0x8000 );
510    #endif  /* defined(_WIN32) || defined(WIN32) */
511    
512  outfile = stdout;  outfile = stdout;
513    
# Line 331  outfile = stdout; Line 515  outfile = stdout;
515    
516  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
517    {    {
518      unsigned char *endptr;
519    
520    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
521      showstore = 1;      showstore = 1;
522    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
523    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
524    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
525      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
526          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
527            *endptr == 0))
528        {
529        op++;
530        argc--;
531        }
532    #if !defined NOPOSIX
533    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
534    #endif
535      else if (strcmp(argv[op], "-C") == 0)
536        {
537        int rc;
538        printf("PCRE version %s\n", pcre_version());
539        printf("Compiled with\n");
540        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
541        printf("  %sUTF-8 support\n", rc? "" : "No ");
542        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
543        printf("  %sUnicode properties support\n", rc? "" : "No ");
544        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
545        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
546        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
547        printf("  Internal link size = %d\n", rc);
548        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
549        printf("  POSIX malloc threshold = %d\n", rc);
550        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
551        printf("  Default match limit = %d\n", rc);
552        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
553        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
554        exit(0);
555        }
556    else    else
557      {      {
558      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
559      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
560      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
561             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n"
562             "  -p   use POSIX interface\n"             "  -i     show information about compiled pattern\n"
563             "  -s   output store information\n"             "  -m     output memory used information\n"
564             "  -t   time compilation and execution\n");             "  -o <n> set size of offsets vector to <n>\n");
565    #if !defined NOPOSIX
566        printf("  -p     use POSIX interface\n");
567    #endif
568        printf("  -s     output store (memory) used information\n"
569               "  -t     time compilation and execution\n");
570      return 1;      return 1;
571      }      }
572    op++;    op++;
573    argc--;    argc--;
574    }    }
575    
576    /* Get the store for the offsets vector, and remember what it was */
577    
578    size_offsets_max = size_offsets;
579    offsets = (int *)malloc(size_offsets_max * sizeof(int));
580    if (offsets == NULL)
581      {
582      printf("** Failed to get %d bytes of memory for offsets vector\n",
583        size_offsets_max * sizeof(int));
584      return 1;
585      }
586    
587  /* Sort out the input and output files */  /* Sort out the input and output files */
588    
589  if (argc > 1)  if (argc > 1)
590    {    {
591    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
592    if (infile == NULL)    if (infile == NULL)
593      {      {
594      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
# Line 366  if (argc > 1) Line 598  if (argc > 1)
598    
599  if (argc > 2)  if (argc > 2)
600    {    {
601    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
602    if (outfile == NULL)    if (outfile == NULL)
603      {      {
604      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
# Line 377  if (argc > 2) Line 609  if (argc > 2)
609  /* Set alternative malloc function */  /* Set alternative malloc function */
610    
611  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
612    pcre_free = new_free;
613    pcre_stack_malloc = stack_malloc;
614    pcre_stack_free = stack_free;
615    
616  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
617    
# Line 391  while (!done) Line 626  while (!done)
626    
627  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
628    regex_t preg;    regex_t preg;
629      int do_posix = 0;
630  #endif  #endif
631    
632    const char *error;    const char *error;
633    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
634    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
635      const unsigned char *tables = NULL;
636      unsigned long int true_size, true_study_size = 0;
637      size_t size, regex_gotten_store;
638    int do_study = 0;    int do_study = 0;
639    int do_debug = debug;    int do_debug = debug;
640    int do_G = 0;    int do_G = 0;
641    int do_g = 0;    int do_g = 0;
642    int do_showinfo = showinfo;    int do_showinfo = showinfo;
643    int do_showrest = 0;    int do_showrest = 0;
644    int do_posix = 0;    int do_flip = 0;
645    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
646    
647      use_utf8 = 0;
648    
649    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
650    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
651    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
652      fflush(outfile);
653    
654    p = buffer;    p = buffer;
655    while (isspace(*p)) p++;    while (isspace(*p)) p++;
656    if (*p == 0) continue;    if (*p == 0) continue;
657    
658    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
659    complete, read more. */  
660      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
661        {
662        unsigned long int magic;
663        uschar sbuf[8];
664        FILE *f;
665    
666        p++;
667        pp = p + (int)strlen((char *)p);
668        while (isspace(pp[-1])) pp--;
669        *pp = 0;
670    
671        f = fopen((char *)p, "rb");
672        if (f == NULL)
673          {
674          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
675          continue;
676          }
677    
678        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
679    
680        true_size =
681          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
682        true_study_size =
683          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
684    
685        re = (real_pcre *)new_malloc(true_size);
686        regex_gotten_store = gotten_store;
687    
688        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
689    
690        magic = ((real_pcre *)re)->magic_number;
691        if (magic != MAGIC_NUMBER)
692          {
693          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
694            {
695            do_flip = 1;
696            }
697          else
698            {
699            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
700            fclose(f);
701            continue;
702            }
703          }
704    
705        fprintf(outfile, "Compiled regex%s loaded from %s\n",
706          do_flip? " (byte-inverted)" : "", p);
707    
708        /* Need to know if UTF-8 for printing data strings */
709    
710        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
711        use_utf8 = (options & PCRE_UTF8) != 0;
712    
713        /* Now see if there is any following study data */
714    
715        if (true_study_size != 0)
716          {
717          pcre_study_data *psd;
718    
719          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
720          extra->flags = PCRE_EXTRA_STUDY_DATA;
721    
722          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
723          extra->study_data = psd;
724    
725          if (fread(psd, 1, true_study_size, f) != true_study_size)
726            {
727            FAIL_READ:
728            fprintf(outfile, "Failed to read data from %s\n", p);
729            if (extra != NULL) new_free(extra);
730            if (re != NULL) new_free(re);
731            fclose(f);
732            continue;
733            }
734          fprintf(outfile, "Study data loaded from %s\n", p);
735          do_study = 1;     /* To get the data output if requested */
736          }
737        else fprintf(outfile, "No study data\n");
738    
739        fclose(f);
740        goto SHOW_INFO;
741        }
742    
743      /* In-line pattern (the usual case). Get the delimiter and seek the end of
744      the pattern; if is isn't complete, read more. */
745    
746    delimiter = *p++;    delimiter = *p++;
747    
# Line 436  while (!done) Line 763  while (!done)
763        }        }
764      if (*pp != 0) break;      if (*pp != 0) break;
765    
766      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
767      if (len < 256)      if (len < 256)
768        {        {
769        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 459  while (!done) Line 786  while (!done)
786    
787    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
788    
789    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
790      for callouts. */
791    
792    *pp++ = 0;    *pp++ = 0;
793      strcpy((char *)pbuffer, (char *)p);
794    
795    /* Look for options after final delimiter */    /* Look for options after final delimiter */
796    
# Line 481  while (!done) Line 810  while (!done)
810    
811        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
812        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
813          case 'C': options |= PCRE_AUTO_CALLOUT; break;
814        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
815        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
816          case 'F': do_flip = 1; break;
817        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
818        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
819        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
820          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
821    
822  #if !defined NOPOSIX  #if !defined NOPOSIX
823        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 494  while (!done) Line 826  while (!done)
826        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
827        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
828        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
829          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
830          case '?': options |= PCRE_NO_UTF8_CHECK; break;
831    
832        case 'L':        case 'L':
833        ppp = pp;        ppp = pp;
# Line 508  while (!done) Line 842  while (!done)
842        pp = ppp;        pp = ppp;
843        break;        break;
844    
845          case '>':
846          to_file = pp;
847          while (*pp != 0) pp++;
848          while (isspace(pp[-1])) pp--;
849          *pp = 0;
850          break;
851    
852        case '\n': case ' ': break;        case '\n': case ' ': break;
853    
854        default:        default:
855        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
856        goto SKIP_DATA;        goto SKIP_DATA;
# Line 524  while (!done) Line 866  while (!done)
866      {      {
867      int rc;      int rc;
868      int cflags = 0;      int cflags = 0;
869    
870      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
871      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
872      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
# Line 533  while (!done) Line 876  while (!done)
876    
877      if (rc != 0)      if (rc != 0)
878        {        {
879        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
880        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
881        goto SKIP_DATA;        goto SKIP_DATA;
882        }        }
# Line 557  while (!done) Line 900  while (!done)
900          }          }
901        time_taken = clock() - start_time;        time_taken = clock() - start_time;
902        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
903          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
904          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
905        }        }
906    
907      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 574  while (!done) Line 917  while (!done)
917          {          {
918          for (;;)          for (;;)
919            {            {
920            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
921              {              {
922              done = 1;              done = 1;
923              goto CONTINUE;              goto CONTINUE;
# Line 592  while (!done) Line 935  while (!done)
935      info-returning functions. The old one has a limited interface and      info-returning functions. The old one has a limited interface and
936      returns only limited data. Check that it agrees with the newer one. */      returns only limited data. Check that it agrees with the newer one. */
937    
938        if (log_store)
939          fprintf(outfile, "Memory allocation (code space): %d\n",
940            (int)(gotten_store -
941                  sizeof(real_pcre) -
942                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
943    
944        /* Extract the size for possible writing before possibly flipping it,
945        and remember the store that was got. */
946    
947        true_size = ((real_pcre *)re)->size;
948        regex_gotten_store = gotten_store;
949    
950        /* If /S was present, study the regexp to generate additional info to
951        help with the matching. */
952    
953        if (do_study)
954          {
955          if (timeit)
956            {
957            register int i;
958            clock_t time_taken;
959            clock_t start_time = clock();
960            for (i = 0; i < LOOPREPEAT; i++)
961              extra = pcre_study(re, study_options, &error);
962            time_taken = clock() - start_time;
963            if (extra != NULL) free(extra);
964            fprintf(outfile, "  Study time %.3f milliseconds\n",
965              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
966                (double)CLOCKS_PER_SEC);
967            }
968          extra = pcre_study(re, study_options, &error);
969          if (error != NULL)
970            fprintf(outfile, "Failed to study: %s\n", error);
971          else if (extra != NULL)
972            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
973          }
974    
975        /* If the 'F' option was present, we flip the bytes of all the integer
976        fields in the regex data block and the study block. This is to make it
977        possible to test PCRE's handling of byte-flipped patterns, e.g. those
978        compiled on a different architecture. */
979    
980        if (do_flip)
981          {
982          real_pcre *rre = (real_pcre *)re;
983          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
984          rre->size = byteflip(rre->size, sizeof(rre->size));
985          rre->options = byteflip(rre->options, sizeof(rre->options));
986          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
987          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
988          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
989          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
990          rre->name_table_offset = byteflip(rre->name_table_offset,
991            sizeof(rre->name_table_offset));
992          rre->name_entry_size = byteflip(rre->name_entry_size,
993            sizeof(rre->name_entry_size));
994          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
995    
996          if (extra != NULL)
997            {
998            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
999            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1000            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1001            }
1002          }
1003    
1004        /* Extract information from the compiled data if required */
1005    
1006        SHOW_INFO:
1007    
1008      if (do_showinfo)      if (do_showinfo)
1009        {        {
1010          unsigned long int get_options, all_options;
1011        int old_first_char, old_options, old_count;        int old_first_char, old_options, old_count;
1012        int count, backrefmax, first_char, need_char;        int count, backrefmax, first_char, need_char;
1013        size_t size;        int nameentrysize, namecount;
1014          const uschar *nametable;
1015    
1016        if (do_debug) print_internals(re);        if (do_debug)
1017            {
1018            fprintf(outfile, "------------------------------------------------------------------\n");
1019            print_internals(re, outfile);
1020            }
1021    
1022        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1023        new_info(re, NULL, PCRE_INFO_SIZE, &size);        new_info(re, NULL, PCRE_INFO_SIZE, &size);
1024        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);        new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1025        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);        new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1026        new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);        new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1027        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);        new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1028          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1029          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1030          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1031    
1032        old_count = pcre_info(re, &old_options, &old_first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
1033        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
# Line 620  while (!done) Line 1042  while (!done)
1042            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1043              first_char, old_first_char);              first_char, old_first_char);
1044    
1045          if (old_options != options) fprintf(outfile,          if (old_options != (int)get_options) fprintf(outfile,
1046            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,            "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1047              old_options);              get_options, old_options);
1048          }          }
1049    
1050        if (size != gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
1051          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1052          size, gotten_store);          size, regex_gotten_store);
1053    
1054        fprintf(outfile, "Capturing subpattern count = %d\n", count);        fprintf(outfile, "Capturing subpattern count = %d\n", count);
1055        if (backrefmax > 0)        if (backrefmax > 0)
1056          fprintf(outfile, "Max back reference = %d\n", backrefmax);          fprintf(outfile, "Max back reference = %d\n", backrefmax);
1057        if (options == 0) fprintf(outfile, "No options\n");  
1058          else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",        if (namecount > 0)
1059            ((options & PCRE_ANCHORED) != 0)? " anchored" : "",          {
1060            ((options & PCRE_CASELESS) != 0)? " caseless" : "",          fprintf(outfile, "Named capturing subpatterns:\n");
1061            ((options & PCRE_EXTENDED) != 0)? " extended" : "",          while (namecount-- > 0)
1062            ((options & PCRE_MULTILINE) != 0)? " multiline" : "",            {
1063            ((options & PCRE_DOTALL) != 0)? " dotall" : "",            fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1064            ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",              nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1065            ((options & PCRE_EXTRA) != 0)? " extra" : "",              GET2(nametable, 0));
1066            ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");            nametable += nameentrysize;
1067              }
1068            }
1069    
1070          /* The NOPARTIAL bit is a private bit in the options, so we have
1071          to fish it out via out back door */
1072    
1073          all_options = ((real_pcre *)re)->options;
1074          if (do_flip)
1075            {
1076            all_options = byteflip(all_options, sizeof(all_options));
1077            }
1078    
1079          if ((all_options & PCRE_NOPARTIAL) != 0)
1080            fprintf(outfile, "Partial matching not supported\n");
1081    
1082          if (get_options == 0) fprintf(outfile, "No options\n");
1083            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s\n",
1084              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1085              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1086              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1087              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1088              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1089              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1090              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1091              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1092              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1093              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1094    
1095        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)        if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1096          fprintf(outfile, "Case state changes\n");          fprintf(outfile, "Case state changes\n");
# Line 656  while (!done) Line 1105  while (!done)
1105          }          }
1106        else        else
1107          {          {
1108          if (isprint(first_char))          int ch = first_char & 255;
1109            fprintf(outfile, "First char = \'%c\'\n", first_char);          const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1110              "" : " (caseless)";
1111            if (isprint(ch))
1112              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1113          else          else
1114            fprintf(outfile, "First char = %d\n", first_char);            fprintf(outfile, "First char = %d%s\n", ch, caseless);
1115          }          }
1116    
1117        if (need_char < 0)        if (need_char < 0)
# Line 668  while (!done) Line 1120  while (!done)
1120          }          }
1121        else        else
1122          {          {
1123          if (isprint(need_char))          int ch = need_char & 255;
1124            fprintf(outfile, "Need char = \'%c\'\n", need_char);          const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1125              "" : " (caseless)";
1126            if (isprint(ch))
1127              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1128          else          else
1129            fprintf(outfile, "Need char = %d\n", need_char);            fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1130          }          }
       }  
1131    
1132      /* If /S was present, study the regexp to generate additional info to        /* Don't output study size; at present it is in any case a fixed
1133      help with the matching. */        value, but it varies, depending on the computer architecture, and
1134          so messes up the test suite. (And with the /F option, it might be
1135          flipped.) */
1136    
1137      if (do_study)        if (do_study)
       {  
       if (timeit)  
1138          {          {
1139          register int i;          if (extra == NULL)
1140          clock_t time_taken;            fprintf(outfile, "Study returned NULL\n");
1141          clock_t start_time = clock();          else
1142          for (i = 0; i < LOOPREPEAT; i++)            {
1143            extra = pcre_study(re, study_options, &error);            uschar *start_bits = NULL;
1144          time_taken = clock() - start_time;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1145          if (extra != NULL) free(extra);  
1146          fprintf(outfile, "  Study time %.3f milliseconds\n",            if (start_bits == NULL)
1147            ((double)time_taken * 1000.0)/              fprintf(outfile, "No starting byte set\n");
1148            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            else
1149                {
1150                int i;
1151                int c = 24;
1152                fprintf(outfile, "Starting byte set: ");
1153                for (i = 0; i < 256; i++)
1154                  {
1155                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1156                    {
1157                    if (c > 75)
1158                      {
1159                      fprintf(outfile, "\n  ");
1160                      c = 2;
1161                      }
1162                    if (isprint(i) && i != ' ')
1163                      {
1164                      fprintf(outfile, "%c ", i);
1165                      c += 2;
1166                      }
1167                    else
1168                      {
1169                      fprintf(outfile, "\\x%02x ", i);
1170                      c += 5;
1171                      }
1172                    }
1173                  }
1174                fprintf(outfile, "\n");
1175                }
1176              }
1177          }          }
1178          }
1179    
1180        extra = pcre_study(re, study_options, &error);      /* If the '>' option was present, we write out the regex to a file, and
1181        if (error != NULL)      that is all. The first 8 bytes of the file are the regex length and then
1182          fprintf(outfile, "Failed to study: %s\n", error);      the study length, in big-endian order. */
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
1183    
1184        else if (do_showinfo)      if (to_file != NULL)
1185          {
1186          FILE *f = fopen((char *)to_file, "wb");
1187          if (f == NULL)
1188          {          {
1189          uschar *start_bits = NULL;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1190          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);          }
1191          if (start_bits == NULL)        else
1192            fprintf(outfile, "No starting character set\n");          {
1193            uschar sbuf[8];
1194            sbuf[0] = (true_size >> 24)  & 255;
1195            sbuf[1] = (true_size >> 16)  & 255;
1196            sbuf[2] = (true_size >>  8)  & 255;
1197            sbuf[3] = (true_size)  & 255;
1198    
1199            sbuf[4] = (true_study_size >> 24)  & 255;
1200            sbuf[5] = (true_study_size >> 16)  & 255;
1201            sbuf[6] = (true_study_size >>  8)  & 255;
1202            sbuf[7] = (true_study_size)  & 255;
1203    
1204            if (fwrite(sbuf, 1, 8, f) < 8 ||
1205                fwrite(re, 1, true_size, f) < true_size)
1206              {
1207              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1208              }
1209          else          else
1210            {            {
1211            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1212            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1213              {              {
1214              if ((start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1215                    true_study_size)
1216                {                {
1217                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1218                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1219                }                }
1220                else fprintf(outfile, "Study data written to %s\n", to_file);
1221              }              }
           fprintf(outfile, "\n");  
1222            }            }
1223            fclose(f);
1224          }          }
1225          continue;  /* With next regex */
1226        }        }
1227      }      }        /* End of non-POSIX compile */
1228    
1229    /* Read data lines and test them */    /* Read data lines and test them */
1230    
# Line 744  while (!done) Line 1232  while (!done)
1232      {      {
1233      unsigned char *q;      unsigned char *q;
1234      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
1235        int *use_offsets = offsets;
1236        int use_size_offsets = size_offsets;
1237        int callout_data = 0;
1238        int callout_data_set = 0;
1239      int count, c;      int count, c;
1240      int copystrings = 0;      int copystrings = 0;
1241        int find_match_limit = 0;
1242      int getstrings = 0;      int getstrings = 0;
1243      int getlist = 0;      int getlist = 0;
1244      int gmatched = 0;      int gmatched = 0;
1245      int start_offset = 0;      int start_offset = 0;
1246      int g_notempty = 0;      int g_notempty = 0;
     int offsets[45];  
     int size_offsets = sizeof(offsets)/sizeof(int);  
1247    
1248      options = 0;      options = 0;
1249    
1250        pcre_callout = callout;
1251        first_callout = 1;
1252        callout_extra = 0;
1253        callout_count = 0;
1254        callout_fail_count = 999999;
1255        callout_fail_id = -1;
1256        show_malloc = 0;
1257    
1258      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
1259      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1260        {        {
1261        done = 1;        done = 1;
1262        goto CONTINUE;        goto CONTINUE;
# Line 777  while (!done) Line 1276  while (!done)
1276        {        {
1277        int i = 0;        int i = 0;
1278        int n = 0;        int n = 0;
1279    
1280        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1281          {          {
1282          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 796  while (!done) Line 1296  while (!done)
1296          break;          break;
1297    
1298          case 'x':          case 'x':
1299    
1300            /* Handle \x{..} specially - new Perl thing for utf8 */
1301    
1302            if (*p == '{')
1303              {
1304              unsigned char *pt = p;
1305              c = 0;
1306              while (isxdigit(*(++pt)))
1307                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1308              if (*pt == '}')
1309                {
1310                unsigned char buff8[8];
1311                int ii, utn;
1312                utn = ord2utf8(c, buff8);
1313                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1314                c = buff8[ii];   /* Last byte */
1315                p = pt + 1;
1316                break;
1317                }
1318              /* Not correct form; fall through */
1319              }
1320    
1321            /* Ordinary \x */
1322    
1323          c = 0;          c = 0;
1324          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1325            {            {
# Line 804  while (!done) Line 1328  while (!done)
1328            }            }
1329          break;          break;
1330    
1331          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1332          p--;          p--;
1333          continue;          continue;
1334    
1335            case '>':
1336            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1337            continue;
1338    
1339          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1340          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1341          continue;          continue;
# Line 817  while (!done) Line 1345  while (!done)
1345          continue;          continue;
1346    
1347          case 'C':          case 'C':
1348          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1349          copystrings |= 1 << n;            {
1350              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1351              copystrings |= 1 << n;
1352              }
1353            else if (isalnum(*p))
1354              {
1355              uschar name[256];
1356              uschar *npp = name;
1357              while (isalnum(*p)) *npp++ = *p++;
1358              *npp = 0;
1359              n = pcre_get_stringnumber(re, (char *)name);
1360              if (n < 0)
1361                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1362              else copystrings |= 1 << n;
1363              }
1364            else if (*p == '+')
1365              {
1366              callout_extra = 1;
1367              p++;
1368              }
1369            else if (*p == '-')
1370              {
1371              pcre_callout = NULL;
1372              p++;
1373              }
1374            else if (*p == '!')
1375              {
1376              callout_fail_id = 0;
1377              p++;
1378              while(isdigit(*p))
1379                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1380              callout_fail_count = 0;
1381              if (*p == '!')
1382                {
1383                p++;
1384                while(isdigit(*p))
1385                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1386                }
1387              }
1388            else if (*p == '*')
1389              {
1390              int sign = 1;
1391              callout_data = 0;
1392              if (*(++p) == '-') { sign = -1; p++; }
1393              while(isdigit(*p))
1394                callout_data = callout_data * 10 + *p++ - '0';
1395              callout_data *= sign;
1396              callout_data_set = 1;
1397              }
1398          continue;          continue;
1399    
1400          case 'G':          case 'G':
1401          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1402          getstrings |= 1 << n;            {
1403              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1404              getstrings |= 1 << n;
1405              }
1406            else if (isalnum(*p))
1407              {
1408              uschar name[256];
1409              uschar *npp = name;
1410              while (isalnum(*p)) *npp++ = *p++;
1411              *npp = 0;
1412              n = pcre_get_stringnumber(re, (char *)name);
1413              if (n < 0)
1414                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1415              else getstrings |= 1 << n;
1416              }
1417          continue;          continue;
1418    
1419          case 'L':          case 'L':
1420          getlist = 1;          getlist = 1;
1421          continue;          continue;
1422    
1423            case 'M':
1424            find_match_limit = 1;
1425            continue;
1426    
1427          case 'N':          case 'N':
1428          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1429          continue;          continue;
1430    
1431          case 'O':          case 'O':
1432          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1433          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1434              {
1435              size_offsets_max = n;
1436              free(offsets);
1437              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1438              if (offsets == NULL)
1439                {
1440                printf("** Failed to get %d bytes of memory for offsets vector\n",
1441                  size_offsets_max * sizeof(int));
1442                return 1;
1443                }
1444              }
1445            use_size_offsets = n;
1446            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1447            continue;
1448    
1449            case 'P':
1450            options |= PCRE_PARTIAL;
1451            continue;
1452    
1453            case 'S':
1454            show_malloc = 1;
1455          continue;          continue;
1456    
1457          case 'Z':          case 'Z':
1458          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1459          continue;          continue;
1460    
1461            case '?':
1462            options |= PCRE_NO_UTF8_CHECK;
1463            continue;
1464          }          }
1465        *q++ = c;        *q++ = c;
1466        }        }
# Line 849  while (!done) Line 1468  while (!done)
1468      len = q - dbuffer;      len = q - dbuffer;
1469    
1470      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1471      support timing. */      support timing or playing with the match limit or callout data. */
1472    
1473  #if !defined NOPOSIX  #if !defined NOPOSIX
1474      if (posix || do_posix)      if (posix || do_posix)
1475        {        {
1476        int rc;        int rc;
1477        int eflags = 0;        int eflags = 0;
1478        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
1479          if (use_size_offsets > 0)
1480            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1481        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1482        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1483    
1484        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1485    
1486        if (rc != 0)        if (rc != 0)
1487          {          {
1488          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1489          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1490          }          }
1491        else        else
1492          {          {
1493          size_t i;          size_t i;
1494          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1495            {            {
1496            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1497              {              {
1498              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1499              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1500                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1501              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1502              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1503                {                {
1504                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1505                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1506                    outfile);
1507                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1508                }                }
1509              }              }
1510            }            }
1511          }          }
1512          free(pmatch);
1513        }        }
1514    
1515      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 903  while (!done) Line 1526  while (!done)
1526          clock_t start_time = clock();          clock_t start_time = clock();
1527          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1528            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1529              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1530          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1531          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1532            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1533            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1534          }          }
1535    
1536        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
1537          start_offset, options | g_notempty, offsets, size_offsets);        varying limits in order to find the minimum value. */
1538    
1539          if (find_match_limit)
1540            {
1541            int min = 0;
1542            int mid = 64;
1543            int max = -1;
1544    
1545            if (extra == NULL)
1546              {
1547              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1548              extra->flags = 0;
1549              }
1550            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1551    
1552            for (;;)
1553              {
1554              extra->match_limit = mid;
1555              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1556                options | g_notempty, use_offsets, use_size_offsets);
1557              if (count == PCRE_ERROR_MATCHLIMIT)
1558                {
1559                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1560                min = mid;
1561                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1562                }
1563              else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1564                                     count == PCRE_ERROR_PARTIAL)
1565                {
1566                if (mid == min + 1)
1567                  {
1568                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1569                  break;
1570                  }
1571                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1572                max = mid;
1573                mid = (min + mid)/2;
1574                }
1575              else break;    /* Some other error */
1576              }
1577    
1578            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1579            }
1580    
1581          /* If callout_data is set, use the interface with additional data */
1582    
1583          else if (callout_data_set)
1584            {
1585            if (extra == NULL)
1586              {
1587              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1588              extra->flags = 0;
1589              }
1590            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1591            extra->callout_data = &callout_data;
1592            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1593              options | g_notempty, use_offsets, use_size_offsets);
1594            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1595            }
1596    
1597          /* The normal case is just to do the match once, with the default
1598          value of match_limit. */
1599    
1600          else
1601            {
1602            count = pcre_exec(re, extra, (char *)bptr, len,
1603              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1604            }
1605    
1606        if (count == 0)        if (count == 0)
1607          {          {
1608          fprintf(outfile, "Matched, but too many substrings\n");          fprintf(outfile, "Matched, but too many substrings\n");
1609          count = size_offsets/3;          count = use_size_offsets/3;
1610          }          }
1611    
1612        /* Matched */        /* Matched */
# Line 926  while (!done) Line 1616  while (!done)
1616          int i;          int i;
1617          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1618            {            {
1619            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1620              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1621            else            else
1622              {              {
1623              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1624              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1625                  use_offsets[i+1] - use_offsets[i], outfile);
1626              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1627              if (i == 0)              if (i == 0)
1628                {                {
1629                if (do_showrest)                if (do_showrest)
1630                  {                  {
1631                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1632                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1633                      outfile);
1634                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1635                  }                  }
1636                }                }
# Line 950  while (!done) Line 1642  while (!done)
1642            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1643              {              {
1644              char copybuffer[16];              char copybuffer[16];
1645              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1646                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
1647              if (rc < 0)              if (rc < 0)
1648                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 964  while (!done) Line 1656  while (!done)
1656            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1657              {              {
1658              const char *substring;              const char *substring;
1659              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1660                i, &substring);                i, &substring);
1661              if (rc < 0)              if (rc < 0)
1662                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
1663              else              else
1664                {                {
1665                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1666                free((void *)substring);                /* free((void *)substring); */
1667                  pcre_free_substring(substring);
1668                }                }
1669              }              }
1670            }            }
# Line 979  while (!done) Line 1672  while (!done)
1672          if (getlist)          if (getlist)
1673            {            {
1674            const char **stringlist;            const char **stringlist;
1675            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1676              &stringlist);              &stringlist);
1677            if (rc < 0)            if (rc < 0)
1678              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 989  while (!done) Line 1682  while (!done)
1682                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1683              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
1684                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
1685              free((void *)stringlist);              /* free((void *)stringlist); */
1686                pcre_free_substring_list(stringlist);
1687              }              }
1688            }            }
1689          }          }
1690    
1691          /* There was a partial match */
1692    
1693          else if (count == PCRE_ERROR_PARTIAL)
1694            {
1695            fprintf(outfile, "Partial match\n");
1696            break;  /* Out of the /g loop */
1697            }
1698    
1699        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1700        PCRE_NOTEMPTY after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1701        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1702        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1703        was checked before setting PCRE_NOTEMPTY. */        offset values to achieve this. We won't be at the end of the string -
1704          that was checked before setting g_notempty. */
1705    
1706        else        else
1707          {          {
1708          if (g_notempty != 0)          if (g_notempty != 0)
1709            {            {
1710            offsets[0] = start_offset;            int onechar = 1;
1711            offsets[1] = start_offset + 1;            use_offsets[0] = start_offset;
1712              if (use_utf8)
1713                {
1714                while (start_offset + onechar < len)
1715                  {
1716                  int tb = bptr[start_offset+onechar];
1717                  if (tb <= 127) break;
1718                  tb &= 0xc0;
1719                  if (tb != 0 && tb != 0xc0) onechar++;
1720                  }
1721                }
1722              use_offsets[1] = start_offset + onechar;
1723            }            }
1724          else          else
1725            {            {
1726            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1727              {              {
1728              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1729              }              }
1730              else fprintf(outfile, "Error %d\n", count);
1731            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1732            }            }
1733          }          }
# Line 1025  while (!done) Line 1739  while (!done)
1739        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
1740        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic
1741        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
1742        we set PCRE_NOTEMPTY and try the match again at the same point. If this        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1743        fails (picked up above) we advance to the next character. */        same point. If this fails (picked up above) we advance to the next
1744          character. */
1745    
1746        g_notempty = 0;        g_notempty = 0;
1747        if (offsets[0] == offsets[1])        if (use_offsets[0] == use_offsets[1])
1748          {          {
1749          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
1750          g_notempty = PCRE_NOTEMPTY;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1751          }          }
1752    
1753        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1754    
1755        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
1756    
1757        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
1758    
1759        else        else
1760          {          {
1761          bptr += offsets[1];          bptr += use_offsets[1];
1762          len -= offsets[1];          len -= use_offsets[1];
1763          }          }
1764        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
1765      }    /* End of loop for data lines */      }    /* End of loop for data lines */
# Line 1064  while (!done) Line 1779  while (!done)
1779      }      }
1780    }    }
1781    
1782  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1783  return 0;  return 0;
1784  }  }
1785    

Legend:
Removed from v.43  
changed lines
  Added in v.75

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12