/[pcre]/code/tags/pcre-6.2/pcretest.c
ViewVC logotype

Diff of /code/tags/pcre-6.2/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 41 by nigel, Sat Feb 24 21:39:17 2007 UTC revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47    #define PCRE_SPY        /* For Win32 build, import data, not export */
48    
49    /* We need the internal info for displaying the results of pcre_study() and
50    other internal data; pcretest also uses some of the fixed tables, and generally
51    has "inside information" compared to a program that strictly follows the PCRE
52    API. */
53    
54  /* Use the internal info for displaying the results of pcre_study(). */  #include "pcre_internal.h"
55    
 #include "internal.h"  
56    
57  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
58  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 29  Makefile. */ Line 70  Makefile. */
70  #endif  #endif
71  #endif  #endif
72    
73  #define LOOPREPEAT 20000  #define LOOPREPEAT 500000
74    
75    #define BUFFER_SIZE 30000
76    #define PBUFFER_SIZE BUFFER_SIZE
77    #define DBUFFER_SIZE BUFFER_SIZE
78    
79    
80  static FILE *outfile;  static FILE *outfile;
81  static int log_store = 0;  static int log_store = 0;
82    static int callout_count;
83    static int callout_extra;
84    static int callout_fail_count;
85    static int callout_fail_id;
86    static int first_callout;
87    static int show_malloc;
88    static int use_utf8;
89    static size_t gotten_store;
90    
91    static uschar *pbuffer = NULL;
92    
93    
94    
95  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
96  code as contained in pcre.c under the DEBUG macro. */  *          Read number from string               *
97    *************************************************/
98    
99  static const char *OP_names[] = {  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
100    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  around with conditional compilation, just do the job by hand. It is only used
101    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  for unpicking the -o argument, so just keep it simple.
102    "Opt", "^", "$", "Any", "chars", "not",  
103    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  Arguments:
104    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    str           string to be converted
105    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    endptr        where to put the end pointer
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",  
   "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",  
   "Brazero", "Braminzero", "Bra"  
 };  
106    
107    Returns:        the unsigned long
108    */
109    
110  static void print_internals(pcre *re)  static int
111    get_value(unsigned char *str, unsigned char **endptr)
112  {  {
113  unsigned char *code = ((real_pcre *)re)->code;  int result = 0;
114    while(*str != 0 && isspace(*str)) str++;
115    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
116    *endptr = str;
117    return(result);
118    }
119    
120    
121    
122    
123    /*************************************************
124    *            Convert UTF-8 string to value       *
125    *************************************************/
126    
127  fprintf(outfile, "------------------------------------------------------------------\n");  /* This function takes one or more bytes that represents a UTF-8 character,
128    and returns the value of the character.
129    
130  for(;;)  Argument:
131      buffer   a pointer to the byte vector
132      vptr     a pointer to an int to receive the value
133    
134    Returns:   >  0 => the number of bytes consumed
135               -6 to 0 => malformed UTF-8 character at offset = (-return)
136    */
137    
138    static int
139    utf82ord(unsigned char *buffer, int *vptr)
140    {
141    int c = *buffer++;
142    int d = c;
143    int i, j, s;
144    
145    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
146    {    {
147    int c;    if ((d & 0x80) == 0) break;
148    int charlength;    d <<= 1;
149      }
150    
151    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
152    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
153    
154    if (*code >= OP_BRA)  /* i now has a value in the range 1-5 */
     {  
     fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
155    
156    else switch(*code)  s = 6*i;
157      {  d = (c & _pcre_utf8_table3[i]) << s;
158      case OP_END:  
159      fprintf(outfile, "    %s\n", OP_names[*code]);  for (j = 0; j < i; j++)
160      fprintf(outfile, "------------------------------------------------------------------\n");    {
161      return;    c = *buffer++;
162      if ((c & 0xc0) != 0x80) return -(j+1);
163      case OP_OPT:    s -= 6;
164      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);    d |= (c & 0x3f) << s;
165      code++;    }
     break;  
   
     case OP_COND:  
     fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  
     code += 2;  
     break;  
   
     case OP_CREF:  
     fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  
     code++;  
     break;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     fprintf(outfile, "%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
         else fprintf(outfile, "\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ASSERTBACK:  
     case OP_ASSERTBACK_NOT:  
     case OP_ONCE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_REVERSE:  
     fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       fprintf(outfile, "    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
       else fprintf(outfile, "    \\x%02x", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  
       else fprintf(outfile, "    \\x%02x{", c);  
     if (*code != OP_EXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     fprintf(outfile, "    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
166    
167        CLASS_REF_REPEAT:  /* Check that encoding was the correct unique one */
168    
169        switch(*code)  for (j = 0; j < _pcre_utf8_table1_size; j++)
170          {    if (d <= _pcre_utf8_table1[j]) break;
171          case OP_CRSTAR:  if (j != i) return -(i+1);
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
172    
173          case OP_CRRANGE:  /* Valid value */
174          case OP_CRMINRANGE:  
175          min = (code[1] << 8) + code[2];  *vptr = d;
176          max = (code[3] << 8) + code[4];  return i+1;
177          if (max == 0) fprintf(outfile, "{%d,}", min);  }
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
178    
179          default:  
180          code--;  
181    /*************************************************
182    *             Print character string             *
183    *************************************************/
184    
185    /* Character string printing function. Must handle UTF-8 strings in utf8
186    mode. Yields number of characters printed. If handed a NULL file, just counts
187    chars without printing. */
188    
189    static int pchars(unsigned char *p, int length, FILE *f)
190    {
191    int c;
192    int yield = 0;
193    
194    while (length-- > 0)
195      {
196      if (use_utf8)
197        {
198        int rc = utf82ord(p, &c);
199    
200        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
201          {
202          length -= rc - 1;
203          p += rc;
204          if (c < 256 && isprint(c))
205            {
206            if (f != NULL) fprintf(f, "%c", c);
207            yield++;
208            }
209          else
210            {
211            int n;
212            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
213            yield += n;
214          }          }
215          continue;
216        }        }
217      break;      }
218    
219      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
220    
221      default:    if (isprint(c = *(p++)))
222      fprintf(outfile, "    %s", OP_names[*code]);      {
223      break;      if (f != NULL) fprintf(f, "%c", c);
224        yield++;
225        }
226      else
227        {
228        if (f != NULL) fprintf(f, "\\x%02x", c);
229        yield += 4;
230      }      }
   
   code++;  
   fprintf(outfile, "\n");  
231    }    }
232    
233    return yield;
234  }  }
235    
236    
237    
238  /* Character string printing function. */  /*************************************************
239    *              Callout function                  *
240    *************************************************/
241    
242  static void pchars(unsigned char *p, int length)  /* Called from PCRE as a result of the (?C) item. We print out where we are in
243    the match. Yield zero unless more callouts than the fail count, or the callout
244    data is not zero. */
245    
246    static int callout(pcre_callout_block *cb)
247  {  {
248  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
249  while (length-- > 0)  int i, pre_start, post_start, subject_length;
250    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
251      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
252      {
253      fprintf(f, "Callout %d: last capture = %d\n",
254        cb->callout_number, cb->capture_last);
255    
256      for (i = 0; i < cb->capture_top * 2; i += 2)
257        {
258        if (cb->offset_vector[i] < 0)
259          fprintf(f, "%2d: <unset>\n", i/2);
260        else
261          {
262          fprintf(f, "%2d: ", i/2);
263          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
264            cb->offset_vector[i+1] - cb->offset_vector[i], f);
265          fprintf(f, "\n");
266          }
267        }
268      }
269    
270    /* Re-print the subject in canonical form, the first time or if giving full
271    datails. On subsequent calls in the same match, we use pchars just to find the
272    printed lengths of the substrings. */
273    
274    if (f != NULL) fprintf(f, "--->");
275    
276    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
277    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
278      cb->current_position - cb->start_match, f);
279    
280    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
281    
282    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
283      cb->subject_length - cb->current_position, f);
284    
285    if (f != NULL) fprintf(f, "\n");
286    
287    /* Always print appropriate indicators, with callout number if not already
288    shown. For automatic callouts, show the pattern offset. */
289    
290    if (cb->callout_number == 255)
291      {
292      fprintf(outfile, "%+3d ", cb->pattern_position);
293      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
294      }
295    else
296      {
297      if (callout_extra) fprintf(outfile, "    ");
298        else fprintf(outfile, "%3d ", cb->callout_number);
299      }
300    
301    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
302    fprintf(outfile, "^");
303    
304    if (post_start > 0)
305      {
306      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
307      fprintf(outfile, "^");
308      }
309    
310    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
311      fprintf(outfile, " ");
312    
313    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
314      pbuffer + cb->pattern_position);
315    
316    fprintf(outfile, "\n");
317    first_callout = 0;
318    
319    if (cb->callout_data != NULL)
320      {
321      int callout_data = *((int *)(cb->callout_data));
322      if (callout_data != 0)
323        {
324        fprintf(outfile, "Callout data = %d\n", callout_data);
325        return callout_data;
326        }
327      }
328    
329    return (cb->callout_number != callout_fail_id)? 0 :
330           (++callout_count >= callout_fail_count)? 1 : 0;
331  }  }
332    
333    
334    /*************************************************
335    *            Local malloc functions              *
336    *************************************************/
337    
338  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
339  compiled re. */  compiled re. */
340    
341  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
342  {  {
343  if (log_store)  void *block = malloc(size);
344    fprintf(outfile, "Memory allocation (code space): %d\n",  gotten_store = size;
345      (int)((int)size - offsetof(real_pcre, code[0])));  if (show_malloc)
346  return malloc(size);    fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
347    return block;
348    }
349    
350    static void new_free(void *block)
351    {
352    if (show_malloc)
353      fprintf(outfile, "free             %p\n", block);
354    free(block);
355  }  }
356    
357    
358    /* For recursion malloc/free, to test stacking calls */
359    
360    static void *stack_malloc(size_t size)
361    {
362    void *block = malloc(size);
363    if (show_malloc)
364      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
365    return block;
366    }
367    
368    static void stack_free(void *block)
369    {
370    if (show_malloc)
371      fprintf(outfile, "stack_free       %p\n", block);
372    free(block);
373    }
374    
375    
376    /*************************************************
377    *          Call pcre_fullinfo()                  *
378    *************************************************/
379    
380    /* Get one piece of information from the pcre_fullinfo() function */
381    
382    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
383    {
384    int rc;
385    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
386      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
387    }
388    
389    
390    
391    /*************************************************
392    *         Byte flipping function                 *
393    *************************************************/
394    
395    static long int
396    byteflip(long int value, int n)
397    {
398    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
399    return ((value & 0x000000ff) << 24) |
400           ((value & 0x0000ff00) <<  8) |
401           ((value & 0x00ff0000) >>  8) |
402           ((value & 0xff000000) >> 24);
403    }
404    
405    
406    
407    
408    /*************************************************
409    *                Main Program                    *
410    *************************************************/
411    
412  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
413  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 302  int op = 1; Line 422  int op = 1;
422  int timeit = 0;  int timeit = 0;
423  int showinfo = 0;  int showinfo = 0;
424  int showstore = 0;  int showstore = 0;
425    int size_offsets = 45;
426    int size_offsets_max;
427    int *offsets = NULL;
428    #if !defined NOPOSIX
429  int posix = 0;  int posix = 0;
430    #endif
431  int debug = 0;  int debug = 0;
432  int done = 0;  int done = 0;
433  unsigned char buffer[30000];  int all_use_dfa = 0;
434  unsigned char dbuffer[1024];  int yield = 0;
435    
436    unsigned char *buffer;
437    unsigned char *dbuffer;
438    
439  /* Static so that new_malloc can use it. */  /* Get buffers from malloc() so that Electric Fence will check their misuse
440    when I am debugging. */
441    
442    buffer = (unsigned char *)malloc(BUFFER_SIZE);
443    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
444    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
445    
446    /* The outfile variable is static so that new_malloc can use it. The _setmode()
447    stuff is some magic that I don't understand, but which apparently does good
448    things in Windows. It's related to line terminations.  */
449    
450    #if defined(_WIN32) || defined(WIN32)
451    _setmode( _fileno( stdout ), 0x8000 );
452    #endif  /* defined(_WIN32) || defined(WIN32) */
453    
454  outfile = stdout;  outfile = stdout;
455    
# Line 316  outfile = stdout; Line 457  outfile = stdout;
457    
458  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
459    {    {
460      unsigned char *endptr;
461    
462    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
463      showstore = 1;      showstore = 1;
464    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
465    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
466    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
467      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
468      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
469          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
470            *endptr == 0))
471        {
472        op++;
473        argc--;
474        }
475    #if !defined NOPOSIX
476    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
477    #endif
478      else if (strcmp(argv[op], "-C") == 0)
479        {
480        int rc;
481        printf("PCRE version %s\n", pcre_version());
482        printf("Compiled with\n");
483        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
484        printf("  %sUTF-8 support\n", rc? "" : "No ");
485        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
486        printf("  %sUnicode properties support\n", rc? "" : "No ");
487        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
488        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
489        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
490        printf("  Internal link size = %d\n", rc);
491        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
492        printf("  POSIX malloc threshold = %d\n", rc);
493        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
494        printf("  Default match limit = %d\n", rc);
495        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
496        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
497        exit(0);
498        }
499    else    else
500      {      {
501      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
502      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
503      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
504             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n");
505             "  -p   use POSIX interface\n"      printf("  -dfa   force DFA matching for all subjects\n");
506             "  -s   output store information\n"      printf("  -i     show information about compiled pattern\n"
507             "  -t   time compilation and execution\n");             "  -m     output memory used information\n"
508      return 1;             "  -o <n> set size of offsets vector to <n>\n");
509    #if !defined NOPOSIX
510        printf("  -p     use POSIX interface\n");
511    #endif
512        printf("  -s     output store (memory) used information\n"
513               "  -t     time compilation and execution\n");
514        yield = 1;
515        goto EXIT;
516      }      }
517    op++;    op++;
518    argc--;    argc--;
519    }    }
520    
521    /* Get the store for the offsets vector, and remember what it was */
522    
523    size_offsets_max = size_offsets;
524    offsets = (int *)malloc(size_offsets_max * sizeof(int));
525    if (offsets == NULL)
526      {
527      printf("** Failed to get %d bytes of memory for offsets vector\n",
528        size_offsets_max * sizeof(int));
529      yield = 1;
530      goto EXIT;
531      }
532    
533  /* Sort out the input and output files */  /* Sort out the input and output files */
534    
535  if (argc > 1)  if (argc > 1)
536    {    {
537    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
538    if (infile == NULL)    if (infile == NULL)
539      {      {
540      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
541      return 1;      yield = 1;
542        goto EXIT;
543      }      }
544    }    }
545    
546  if (argc > 2)  if (argc > 2)
547    {    {
548    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
549    if (outfile == NULL)    if (outfile == NULL)
550      {      {
551      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
552      return 1;      yield = 1;
553        goto EXIT;
554      }      }
555    }    }
556    
557  /* Set alternative malloc function */  /* Set alternative malloc function */
558    
559  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
560    pcre_free = new_free;
561    pcre_stack_malloc = stack_malloc;
562    pcre_stack_free = stack_free;
563    
564  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
565    
# Line 376  while (!done) Line 574  while (!done)
574    
575  #if !defined NOPOSIX  /* There are still compilers that require no indent */  #if !defined NOPOSIX  /* There are still compilers that require no indent */
576    regex_t preg;    regex_t preg;
577      int do_posix = 0;
578  #endif  #endif
579    
580    const char *error;    const char *error;
581    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
582    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
583      const unsigned char *tables = NULL;
584      unsigned long int true_size, true_study_size = 0;
585      size_t size, regex_gotten_store;
586    int do_study = 0;    int do_study = 0;
587    int do_debug = debug;    int do_debug = debug;
588    int do_G = 0;    int do_G = 0;
589    int do_g = 0;    int do_g = 0;
590    int do_showinfo = showinfo;    int do_showinfo = showinfo;
591    int do_showrest = 0;    int do_showrest = 0;
592    int do_posix = 0;    int do_flip = 0;
593    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
594    
595      use_utf8 = 0;
596    
597    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
598    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
599    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
600      fflush(outfile);
601    
602    p = buffer;    p = buffer;
603    while (isspace(*p)) p++;    while (isspace(*p)) p++;
604    if (*p == 0) continue;    if (*p == 0) continue;
605    
606    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
607    complete, read more. */  
608      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
609        {
610        unsigned long int magic;
611        uschar sbuf[8];
612        FILE *f;
613    
614        p++;
615        pp = p + (int)strlen((char *)p);
616        while (isspace(pp[-1])) pp--;
617        *pp = 0;
618    
619        f = fopen((char *)p, "rb");
620        if (f == NULL)
621          {
622          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
623          continue;
624          }
625    
626        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
627    
628        true_size =
629          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
630        true_study_size =
631          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
632    
633        re = (real_pcre *)new_malloc(true_size);
634        regex_gotten_store = gotten_store;
635    
636        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
637    
638        magic = ((real_pcre *)re)->magic_number;
639        if (magic != MAGIC_NUMBER)
640          {
641          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
642            {
643            do_flip = 1;
644            }
645          else
646            {
647            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
648            fclose(f);
649            continue;
650            }
651          }
652    
653        fprintf(outfile, "Compiled regex%s loaded from %s\n",
654          do_flip? " (byte-inverted)" : "", p);
655    
656        /* Need to know if UTF-8 for printing data strings */
657    
658        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
659        use_utf8 = (options & PCRE_UTF8) != 0;
660    
661        /* Now see if there is any following study data */
662    
663        if (true_study_size != 0)
664          {
665          pcre_study_data *psd;
666    
667          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
668          extra->flags = PCRE_EXTRA_STUDY_DATA;
669    
670          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
671          extra->study_data = psd;
672    
673          if (fread(psd, 1, true_study_size, f) != true_study_size)
674            {
675            FAIL_READ:
676            fprintf(outfile, "Failed to read data from %s\n", p);
677            if (extra != NULL) new_free(extra);
678            if (re != NULL) new_free(re);
679            fclose(f);
680            continue;
681            }
682          fprintf(outfile, "Study data loaded from %s\n", p);
683          do_study = 1;     /* To get the data output if requested */
684          }
685        else fprintf(outfile, "No study data\n");
686    
687        fclose(f);
688        goto SHOW_INFO;
689        }
690    
691      /* In-line pattern (the usual case). Get the delimiter and seek the end of
692      the pattern; if is isn't complete, read more. */
693    
694    delimiter = *p++;    delimiter = *p++;
695    
# Line 421  while (!done) Line 711  while (!done)
711        }        }
712      if (*pp != 0) break;      if (*pp != 0) break;
713    
714      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
715      if (len < 256)      if (len < 256)
716        {        {
717        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 444  while (!done) Line 734  while (!done)
734    
735    if (pp[1] == '\\') *pp++ = '\\';    if (pp[1] == '\\') *pp++ = '\\';
736    
737    /* Terminate the pattern at the delimiter */    /* Terminate the pattern at the delimiter, and save a copy of the pattern
738      for callouts. */
739    
740    *pp++ = 0;    *pp++ = 0;
741      strcpy((char *)pbuffer, (char *)p);
742    
743    /* Look for options after final delimiter */    /* Look for options after final delimiter */
744    
# Line 458  while (!done) Line 750  while (!done)
750      {      {
751      switch (*pp++)      switch (*pp++)
752        {        {
753          case 'f': options |= PCRE_FIRSTLINE; break;
754        case 'g': do_g = 1; break;        case 'g': do_g = 1; break;
755        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
756        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
# Line 466  while (!done) Line 759  while (!done)
759    
760        case '+': do_showrest = 1; break;        case '+': do_showrest = 1; break;
761        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
762          case 'C': options |= PCRE_AUTO_CALLOUT; break;
763        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
764        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
765          case 'F': do_flip = 1; break;
766        case 'G': do_G = 1; break;        case 'G': do_G = 1; break;
767        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
768        case 'M': log_store = 1; break;        case 'M': log_store = 1; break;
769          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
770    
771  #if !defined NOPOSIX  #if !defined NOPOSIX
772        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
# Line 479  while (!done) Line 775  while (!done)
775        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
776        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
777        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
778          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
779          case '?': options |= PCRE_NO_UTF8_CHECK; break;
780    
781        case 'L':        case 'L':
782        ppp = pp;        ppp = pp;
783        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
784          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
785        *ppp = 0;        *ppp = 0;
786        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
787          {          {
# Line 493  while (!done) Line 792  while (!done)
792        pp = ppp;        pp = ppp;
793        break;        break;
794    
795        case '\n': case ' ': break;        case '>':
796          to_file = pp;
797          while (*pp != 0) pp++;
798          while (isspace(pp[-1])) pp--;
799          *pp = 0;
800          break;
801    
802          case '\r':                      /* So that it works in Windows */
803          case '\n':
804          case ' ':
805          break;
806    
807        default:        default:
808        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
809        goto SKIP_DATA;        goto SKIP_DATA;
# Line 509  while (!done) Line 819  while (!done)
819      {      {
820      int rc;      int rc;
821      int cflags = 0;      int cflags = 0;
822    
823      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
824      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
825        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
826      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
827    
828      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 518  while (!done) Line 830  while (!done)
830    
831      if (rc != 0)      if (rc != 0)
832        {        {
833        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
834        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
835        goto SKIP_DATA;        goto SKIP_DATA;
836        }        }
# Line 542  while (!done) Line 854  while (!done)
854          }          }
855        time_taken = clock() - start_time;        time_taken = clock() - start_time;
856        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
857          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
858          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
859        }        }
860    
861      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 559  while (!done) Line 871  while (!done)
871          {          {
872          for (;;)          for (;;)
873            {            {
874            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
875              {              {
876              done = 1;              done = 1;
877              goto CONTINUE;              goto CONTINUE;
# Line 573  while (!done) Line 885  while (!done)
885        goto CONTINUE;        goto CONTINUE;
886        }        }
887    
888      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
889        info-returning functions. The old one has a limited interface and
890      if (do_showinfo)      returns only limited data. Check that it agrees with the newer one. */
891        {  
892        int first_char, count;      if (log_store)
893          fprintf(outfile, "Memory allocation (code space): %d\n",
894        if (do_debug) print_internals(re);          (int)(gotten_store -
895                  sizeof(real_pcre) -
896        count = pcre_info(re, &options, &first_char);                ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
       if (count < 0) fprintf(outfile,  
         "Error %d while reading info\n", count);  
       else  
         {  
         fprintf(outfile, "Identifying subpattern count = %d\n", count);  
         if (options == 0) fprintf(outfile, "No options\n");  
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "",  
             ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
   
         if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)  
           fprintf(outfile, "Case state changes\n");  
897    
898          if (first_char == -1)      /* Extract the size for possible writing before possibly flipping it,
899            {      and remember the store that was got. */
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
         else  
           {  
           if (isprint(first_char))  
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
900    
901          if (((((real_pcre *)re)->options) & PCRE_REQCHSET) != 0)      true_size = ((real_pcre *)re)->size;
902            {      regex_gotten_store = gotten_store;
           int req_char = ((real_pcre *)re)->req_char;  
           if (isprint(req_char))  
             fprintf(outfile, "Req char = \'%c\'\n", req_char);  
           else  
             fprintf(outfile, "Req char = %d\n", req_char);  
           }  
         else fprintf(outfile, "No req char\n");  
         }  
       }  
903    
904      /* If /S was present, study the regexp to generate additional info to      /* If /S was present, study the regexp to generate additional info to
905      help with the matching. */      help with the matching. */
# Line 644  while (!done) Line 916  while (!done)
916          time_taken = clock() - start_time;          time_taken = clock() - start_time;
917          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
918          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
919            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
920            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
921          }          }
   
922        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
923        if (error != NULL)        if (error != NULL)
924          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
925        else if (extra == NULL)        else if (extra != NULL)
926          fprintf(outfile, "Study returned NULL\n");          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
927          }
928    
929        /* If the 'F' option was present, we flip the bytes of all the integer
930        fields in the regex data block and the study block. This is to make it
931        possible to test PCRE's handling of byte-flipped patterns, e.g. those
932        compiled on a different architecture. */
933    
934        if (do_flip)
935          {
936          real_pcre *rre = (real_pcre *)re;
937          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
938          rre->size = byteflip(rre->size, sizeof(rre->size));
939          rre->options = byteflip(rre->options, sizeof(rre->options));
940          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
941          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
942          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
943          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
944          rre->name_table_offset = byteflip(rre->name_table_offset,
945            sizeof(rre->name_table_offset));
946          rre->name_entry_size = byteflip(rre->name_entry_size,
947            sizeof(rre->name_entry_size));
948          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
949    
950          if (extra != NULL)
951            {
952            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
953            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
954            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
955            }
956          }
957    
958        /* Extract information from the compiled data if required */
959    
960        SHOW_INFO:
961    
962        if (do_showinfo)
963          {
964          unsigned long int get_options, all_options;
965          int old_first_char, old_options, old_count;
966          int count, backrefmax, first_char, need_char;
967          int nameentrysize, namecount;
968          const uschar *nametable;
969    
970          if (do_debug)
971            {
972            fprintf(outfile, "------------------------------------------------------------------\n");
973            _pcre_printint(re, outfile);
974            }
975    
976        /* This looks at internal information. A bit kludgy to do it this        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
977        way, but it is useful for testing. */        new_info(re, NULL, PCRE_INFO_SIZE, &size);
978          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
979          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
980          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
981          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
982          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
983          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
984          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
985    
986        else if (do_showinfo)        old_count = pcre_info(re, &old_options, &old_first_char);
987          if (count < 0) fprintf(outfile,
988            "Error %d from pcre_info()\n", count);
989          else
990          {          {
991          real_pcre_extra *xx = (real_pcre_extra *)extra;          if (old_count != count) fprintf(outfile,
992          if ((xx->options & PCRE_STUDY_MAPPED) == 0)            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
993            fprintf(outfile, "No starting character set\n");              old_count);
994    
995            if (old_first_char != first_char) fprintf(outfile,
996              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
997                first_char, old_first_char);
998    
999            if (old_options != (int)get_options) fprintf(outfile,
1000              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1001                get_options, old_options);
1002            }
1003    
1004          if (size != regex_gotten_store) fprintf(outfile,
1005            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1006            (int)size, (int)regex_gotten_store);
1007    
1008          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1009          if (backrefmax > 0)
1010            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1011    
1012          if (namecount > 0)
1013            {
1014            fprintf(outfile, "Named capturing subpatterns:\n");
1015            while (namecount-- > 0)
1016              {
1017              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1018                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1019                GET2(nametable, 0));
1020              nametable += nameentrysize;
1021              }
1022            }
1023    
1024          /* The NOPARTIAL bit is a private bit in the options, so we have
1025          to fish it out via out back door */
1026    
1027          all_options = ((real_pcre *)re)->options;
1028          if (do_flip)
1029            {
1030            all_options = byteflip(all_options, sizeof(all_options));
1031            }
1032    
1033          if ((all_options & PCRE_NOPARTIAL) != 0)
1034            fprintf(outfile, "Partial matching not supported\n");
1035    
1036          if (get_options == 0) fprintf(outfile, "No options\n");
1037            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1038              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1039              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1040              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1041              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1042              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1043              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1044              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1045              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1046              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1047              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1048              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1049    
1050          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1051            fprintf(outfile, "Case state changes\n");
1052    
1053          if (first_char == -1)
1054            {
1055            fprintf(outfile, "First char at start or follows \\n\n");
1056            }
1057          else if (first_char < 0)
1058            {
1059            fprintf(outfile, "No first char\n");
1060            }
1061          else
1062            {
1063            int ch = first_char & 255;
1064            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1065              "" : " (caseless)";
1066            if (isprint(ch))
1067              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1068            else
1069              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1070            }
1071    
1072          if (need_char < 0)
1073            {
1074            fprintf(outfile, "No need char\n");
1075            }
1076          else
1077            {
1078            int ch = need_char & 255;
1079            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1080              "" : " (caseless)";
1081            if (isprint(ch))
1082              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1083            else
1084              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1085            }
1086    
1087          /* Don't output study size; at present it is in any case a fixed
1088          value, but it varies, depending on the computer architecture, and
1089          so messes up the test suite. (And with the /F option, it might be
1090          flipped.) */
1091    
1092          if (do_study)
1093            {
1094            if (extra == NULL)
1095              fprintf(outfile, "Study returned NULL\n");
1096          else          else
1097            {            {
1098            int i;            uschar *start_bits = NULL;
1099            int c = 24;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1100            fprintf(outfile, "Starting character set: ");  
1101            for (i = 0; i < 256; i++)            if (start_bits == NULL)
1102                fprintf(outfile, "No starting byte set\n");
1103              else
1104              {              {
1105              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              int i;
1106                int c = 24;
1107                fprintf(outfile, "Starting byte set: ");
1108                for (i = 0; i < 256; i++)
1109                {                {
1110                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
1111                  {                  {
1112                  fprintf(outfile, "\n  ");                  if (c > 75)
1113                  c = 2;                    {
1114                  }                    fprintf(outfile, "\n  ");
1115                if (isprint(i) && i != ' ')                    c = 2;
1116                  {                    }
1117                  fprintf(outfile, "%c ", i);                  if (isprint(i) && i != ' ')
1118                  c += 2;                    {
1119                  }                    fprintf(outfile, "%c ", i);
1120                else                    c += 2;
1121                  {                    }
1122                  fprintf(outfile, "\\x%02x ", i);                  else
1123                  c += 5;                    {
1124                      fprintf(outfile, "\\x%02x ", i);
1125                      c += 5;
1126                      }
1127                  }                  }
1128                }                }
1129                fprintf(outfile, "\n");
1130              }              }
           fprintf(outfile, "\n");  
1131            }            }
1132          }          }
1133        }        }
1134      }  
1135        /* If the '>' option was present, we write out the regex to a file, and
1136        that is all. The first 8 bytes of the file are the regex length and then
1137        the study length, in big-endian order. */
1138    
1139        if (to_file != NULL)
1140          {
1141          FILE *f = fopen((char *)to_file, "wb");
1142          if (f == NULL)
1143            {
1144            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1145            }
1146          else
1147            {
1148            uschar sbuf[8];
1149            sbuf[0] = (true_size >> 24)  & 255;
1150            sbuf[1] = (true_size >> 16)  & 255;
1151            sbuf[2] = (true_size >>  8)  & 255;
1152            sbuf[3] = (true_size)  & 255;
1153    
1154            sbuf[4] = (true_study_size >> 24)  & 255;
1155            sbuf[5] = (true_study_size >> 16)  & 255;
1156            sbuf[6] = (true_study_size >>  8)  & 255;
1157            sbuf[7] = (true_study_size)  & 255;
1158    
1159            if (fwrite(sbuf, 1, 8, f) < 8 ||
1160                fwrite(re, 1, true_size, f) < true_size)
1161              {
1162              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1163              }
1164            else
1165              {
1166              fprintf(outfile, "Compiled regex written to %s\n", to_file);
1167              if (extra != NULL)
1168                {
1169                if (fwrite(extra->study_data, 1, true_study_size, f) <
1170                    true_study_size)
1171                  {
1172                  fprintf(outfile, "Write error on %s: %s\n", to_file,
1173                    strerror(errno));
1174                  }
1175                else fprintf(outfile, "Study data written to %s\n", to_file);
1176                }
1177              }
1178            fclose(f);
1179            }
1180    
1181          new_free(re);
1182          if (extra != NULL) new_free(extra);
1183          if (tables != NULL) new_free((void *)tables);
1184          continue;  /* With next regex */
1185          }
1186        }        /* End of non-POSIX compile */
1187    
1188    /* Read data lines and test them */    /* Read data lines and test them */
1189    
# Line 700  while (!done) Line 1191  while (!done)
1191      {      {
1192      unsigned char *q;      unsigned char *q;
1193      unsigned char *bptr = dbuffer;      unsigned char *bptr = dbuffer;
1194        int *use_offsets = offsets;
1195        int use_size_offsets = size_offsets;
1196        int callout_data = 0;
1197        int callout_data_set = 0;
1198      int count, c;      int count, c;
1199      int copystrings = 0;      int copystrings = 0;
1200        int find_match_limit = 0;
1201      int getstrings = 0;      int getstrings = 0;
1202      int getlist = 0;      int getlist = 0;
1203      int gmatched = 0;      int gmatched = 0;
1204      int start_offset = 0;      int start_offset = 0;
1205      int g_notempty = 0;      int g_notempty = 0;
1206      int offsets[45];      int use_dfa = 0;
     int size_offsets = sizeof(offsets)/sizeof(int);  
1207    
1208      options = 0;      options = 0;
1209    
1210        pcre_callout = callout;
1211        first_callout = 1;
1212        callout_extra = 0;
1213        callout_count = 0;
1214        callout_fail_count = 999999;
1215        callout_fail_id = -1;
1216        show_malloc = 0;
1217    
1218      if (infile == stdin) printf("data> ");      if (infile == stdin) printf("data> ");
1219      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1220        {        {
1221        done = 1;        done = 1;
1222        goto CONTINUE;        goto CONTINUE;
# Line 733  while (!done) Line 1236  while (!done)
1236        {        {
1237        int i = 0;        int i = 0;
1238        int n = 0;        int n = 0;
1239    
1240        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1241          {          {
1242          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 752  while (!done) Line 1256  while (!done)
1256          break;          break;
1257    
1258          case 'x':          case 'x':
1259    
1260            /* Handle \x{..} specially - new Perl thing for utf8 */
1261    
1262            if (*p == '{')
1263              {
1264              unsigned char *pt = p;
1265              c = 0;
1266              while (isxdigit(*(++pt)))
1267                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1268              if (*pt == '}')
1269                {
1270                unsigned char buff8[8];
1271                int ii, utn;
1272                utn = _pcre_ord2utf8(c, buff8);
1273                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1274                c = buff8[ii];   /* Last byte */
1275                p = pt + 1;
1276                break;
1277                }
1278              /* Not correct form; fall through */
1279              }
1280    
1281            /* Ordinary \x */
1282    
1283          c = 0;          c = 0;
1284          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1285            {            {
# Line 760  while (!done) Line 1288  while (!done)
1288            }            }
1289          break;          break;
1290    
1291          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1292          p--;          p--;
1293          continue;          continue;
1294    
1295            case '>':
1296            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1297            continue;
1298    
1299          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1300          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1301          continue;          continue;
# Line 773  while (!done) Line 1305  while (!done)
1305          continue;          continue;
1306    
1307          case 'C':          case 'C':
1308          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))    /* Set copy string */
1309          copystrings |= 1 << n;            {
1310              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1311              copystrings |= 1 << n;
1312              }
1313            else if (isalnum(*p))
1314              {
1315              uschar name[256];
1316              uschar *npp = name;
1317              while (isalnum(*p)) *npp++ = *p++;
1318              *npp = 0;
1319              n = pcre_get_stringnumber(re, (char *)name);
1320              if (n < 0)
1321                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1322              else copystrings |= 1 << n;
1323              }
1324            else if (*p == '+')
1325              {
1326              callout_extra = 1;
1327              p++;
1328              }
1329            else if (*p == '-')
1330              {
1331              pcre_callout = NULL;
1332              p++;
1333              }
1334            else if (*p == '!')
1335              {
1336              callout_fail_id = 0;
1337              p++;
1338              while(isdigit(*p))
1339                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1340              callout_fail_count = 0;
1341              if (*p == '!')
1342                {
1343                p++;
1344                while(isdigit(*p))
1345                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1346                }
1347              }
1348            else if (*p == '*')
1349              {
1350              int sign = 1;
1351              callout_data = 0;
1352              if (*(++p) == '-') { sign = -1; p++; }
1353              while(isdigit(*p))
1354                callout_data = callout_data * 10 + *p++ - '0';
1355              callout_data *= sign;
1356              callout_data_set = 1;
1357              }
1358            continue;
1359    
1360            case 'D':
1361            if (posix || do_posix)
1362              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1363            else
1364              use_dfa = 1;
1365            continue;
1366    
1367            case 'F':
1368            options |= PCRE_DFA_SHORTEST;
1369          continue;          continue;
1370    
1371          case 'G':          case 'G':
1372          while(isdigit(*p)) n = n * 10 + *p++ - '0';          if (isdigit(*p))
1373          getstrings |= 1 << n;            {
1374              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1375              getstrings |= 1 << n;
1376              }
1377            else if (isalnum(*p))
1378              {
1379              uschar name[256];
1380              uschar *npp = name;
1381              while (isalnum(*p)) *npp++ = *p++;
1382              *npp = 0;
1383              n = pcre_get_stringnumber(re, (char *)name);
1384              if (n < 0)
1385                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1386              else getstrings |= 1 << n;
1387              }
1388          continue;          continue;
1389    
1390          case 'L':          case 'L':
1391          getlist = 1;          getlist = 1;
1392          continue;          continue;
1393    
1394            case 'M':
1395            find_match_limit = 1;
1396            continue;
1397    
1398          case 'N':          case 'N':
1399          options |= PCRE_NOTEMPTY;          options |= PCRE_NOTEMPTY;
1400          continue;          continue;
1401    
1402          case 'O':          case 'O':
1403          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1404          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1405              {
1406              size_offsets_max = n;
1407              free(offsets);
1408              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1409              if (offsets == NULL)
1410                {
1411                printf("** Failed to get %d bytes of memory for offsets vector\n",
1412                  size_offsets_max * sizeof(int));
1413                yield = 1;
1414                goto EXIT;
1415                }
1416              }
1417            use_size_offsets = n;
1418            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1419            continue;
1420    
1421            case 'P':
1422            options |= PCRE_PARTIAL;
1423            continue;
1424    
1425            case 'R':
1426            options |= PCRE_DFA_RESTART;
1427            continue;
1428    
1429            case 'S':
1430            show_malloc = 1;
1431          continue;          continue;
1432    
1433          case 'Z':          case 'Z':
1434          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1435          continue;          continue;
1436    
1437            case '?':
1438            options |= PCRE_NO_UTF8_CHECK;
1439            continue;
1440          }          }
1441        *q++ = c;        *q++ = c;
1442        }        }
1443      *q = 0;      *q = 0;
1444      len = q - dbuffer;      len = q - dbuffer;
1445    
1446        if ((all_use_dfa || use_dfa) && find_match_limit)
1447          {
1448          printf("**Match limit not relevant for DFA matching: ignored\n");
1449          find_match_limit = 0;
1450          }
1451    
1452      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1453      support timing. */      support timing or playing with the match limit or callout data. */
1454    
1455  #if !defined NOPOSIX  #if !defined NOPOSIX
1456      if (posix || do_posix)      if (posix || do_posix)
1457        {        {
1458        int rc;        int rc;
1459        int eflags = 0;        int eflags = 0;
1460        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];        regmatch_t *pmatch = NULL;
1461          if (use_size_offsets > 0)
1462            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1463        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1464        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1465    
1466        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1467    
1468        if (rc != 0)        if (rc != 0)
1469          {          {
1470          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1471          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1472          }          }
1473        else        else
1474          {          {
1475          size_t i;          size_t i;
1476          for (i = 0; i < size_offsets; i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1477            {            {
1478            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1479              {              {
1480              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1481              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1482                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1483              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1484              if (i == 0 && do_showrest)              if (i == 0 && do_showrest)
1485                {                {
1486                fprintf(outfile, " 0+ ");                fprintf(outfile, " 0+ ");
1487                pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo);                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1488                    outfile);
1489                fprintf(outfile, "\n");                fprintf(outfile, "\n");
1490                }                }
1491              }              }
1492            }            }
1493          }          }
1494          free(pmatch);
1495        }        }
1496    
1497      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
# Line 857  while (!done) Line 1506  while (!done)
1506          register int i;          register int i;
1507          clock_t time_taken;          clock_t time_taken;
1508          clock_t start_time = clock();          clock_t start_time = clock();
1509    
1510            if (all_use_dfa || use_dfa)
1511              {
1512              int workspace[1000];
1513              for (i = 0; i < LOOPREPEAT; i++)
1514                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1515                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1516                  sizeof(workspace)/sizeof(int));
1517              }
1518            else
1519    
1520          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1521            count = pcre_exec(re, extra, (char *)bptr, len,            count = pcre_exec(re, extra, (char *)bptr, len,
1522              start_offset, options | g_notempty, offsets, size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1523    
1524          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1525          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1526            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1527            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1528          }          }
1529    
1530        count = pcre_exec(re, extra, (char *)bptr, len,        /* If find_match_limit is set, we want to do repeated matches with
1531          start_offset, options | g_notempty, offsets, size_offsets);        varying limits in order to find the minimum value. */
1532    
1533        if (count == 0)        if (find_match_limit)
1534          {          {
1535          fprintf(outfile, "Matched, but too many substrings\n");          int min = 0;
1536          count = size_offsets/3;          int mid = 64;
1537            int max = -1;
1538    
1539            if (extra == NULL)
1540              {
1541              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1542              extra->flags = 0;
1543              }
1544            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1545    
1546            for (;;)
1547              {
1548              extra->match_limit = mid;
1549              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1550                options | g_notempty, use_offsets, use_size_offsets);
1551              if (count == PCRE_ERROR_MATCHLIMIT)
1552                {
1553                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1554                min = mid;
1555                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1556                }
1557              else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1558                                     count == PCRE_ERROR_PARTIAL)
1559                {
1560                if (mid == min + 1)
1561                  {
1562                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1563                  break;
1564                  }
1565                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1566                max = mid;
1567                mid = (min + mid)/2;
1568                }
1569              else break;    /* Some other error */
1570              }
1571    
1572            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1573            }
1574    
1575          /* If callout_data is set, use the interface with additional data */
1576    
1577          else if (callout_data_set)
1578            {
1579            if (extra == NULL)
1580              {
1581              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1582              extra->flags = 0;
1583              }
1584            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1585            extra->callout_data = &callout_data;
1586            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1587              options | g_notempty, use_offsets, use_size_offsets);
1588            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1589            }
1590    
1591          /* The normal case is just to do the match once, with the default
1592          value of match_limit. */
1593    
1594          else if (all_use_dfa || use_dfa)
1595            {
1596            int workspace[1000];
1597            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1598              options | g_notempty, use_offsets, use_size_offsets, workspace,
1599              sizeof(workspace)/sizeof(int));
1600            if (count == 0)
1601              {
1602              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1603              count = use_size_offsets/2;
1604              }
1605            }
1606    
1607          else
1608            {
1609            count = pcre_exec(re, extra, (char *)bptr, len,
1610              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1611            if (count == 0)
1612              {
1613              fprintf(outfile, "Matched, but too many substrings\n");
1614              count = use_size_offsets/3;
1615              }
1616          }          }
1617    
1618        /* Matched */        /* Matched */
# Line 882  while (!done) Line 1622  while (!done)
1622          int i;          int i;
1623          for (i = 0; i < count * 2; i += 2)          for (i = 0; i < count * 2; i += 2)
1624            {            {
1625            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1626              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1627            else            else
1628              {              {
1629              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1630              pchars(bptr + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1631                  use_offsets[i+1] - use_offsets[i], outfile);
1632              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1633              if (i == 0)              if (i == 0)
1634                {                {
1635                if (do_showrest)                if (do_showrest)
1636                  {                  {
1637                  fprintf(outfile, " 0+ ");                  fprintf(outfile, " 0+ ");
1638                  pchars(bptr + offsets[i+1], len - offsets[i+1]);                  (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1639                      outfile);
1640                  fprintf(outfile, "\n");                  fprintf(outfile, "\n");
1641                  }                  }
1642                }                }
# Line 906  while (!done) Line 1648  while (!done)
1648            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1649              {              {
1650              char copybuffer[16];              char copybuffer[16];
1651              int rc = pcre_copy_substring((char *)bptr, offsets, count,              int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1652                i, copybuffer, sizeof(copybuffer));                i, copybuffer, sizeof(copybuffer));
1653              if (rc < 0)              if (rc < 0)
1654                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
# Line 920  while (!done) Line 1662  while (!done)
1662            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1663              {              {
1664              const char *substring;              const char *substring;
1665              int rc = pcre_get_substring((char *)bptr, offsets, count,              int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1666                i, &substring);                i, &substring);
1667              if (rc < 0)              if (rc < 0)
1668                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
1669              else              else
1670                {                {
1671                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1672                free((void *)substring);                /* free((void *)substring); */
1673                  pcre_free_substring(substring);
1674                }                }
1675              }              }
1676            }            }
# Line 935  while (!done) Line 1678  while (!done)
1678          if (getlist)          if (getlist)
1679            {            {
1680            const char **stringlist;            const char **stringlist;
1681            int rc = pcre_get_substring_list((char *)bptr, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1682              &stringlist);              &stringlist);
1683            if (rc < 0)            if (rc < 0)
1684              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 945  while (!done) Line 1688  while (!done)
1688                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1689              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
1690                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
1691              free((void *)stringlist);              /* free((void *)stringlist); */
1692                pcre_free_substring_list(stringlist);
1693              }              }
1694            }            }
1695          }          }
1696    
1697          /* There was a partial match */
1698    
1699          else if (count == PCRE_ERROR_PARTIAL)
1700            {
1701            fprintf(outfile, "Partial match");
1702            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1703              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1704                bptr + use_offsets[0]);
1705            fprintf(outfile, "\n");
1706            break;  /* Out of the /g loop */
1707            }
1708    
1709        /* Failed to match. If this is a /g or /G loop and we previously set        /* Failed to match. If this is a /g or /G loop and we previously set
1710        PCRE_NOTEMPTY after a null match, this is not necessarily the end.        g_notempty after a null match, this is not necessarily the end.
1711        We want to advance the start offset, and continue. Fudge the offset        We want to advance the start offset, and continue. In the case of UTF-8
1712        values to achieve this. We won't be at the end of the string - that        matching, the advance must be one character, not one byte. Fudge the
1713        was checked before setting PCRE_NOTEMPTY. */        offset values to achieve this. We won't be at the end of the string -
1714          that was checked before setting g_notempty. */
1715    
1716        else        else
1717          {          {
1718          if (g_notempty != 0)          if (g_notempty != 0)
1719            {            {
1720            offsets[0] = start_offset;            int onechar = 1;
1721            offsets[1] = start_offset + 1;            use_offsets[0] = start_offset;
1722              if (use_utf8)
1723                {
1724                while (start_offset + onechar < len)
1725                  {
1726                  int tb = bptr[start_offset+onechar];
1727                  if (tb <= 127) break;
1728                  tb &= 0xc0;
1729                  if (tb != 0 && tb != 0xc0) onechar++;
1730                  }
1731                }
1732              use_offsets[1] = start_offset + onechar;
1733            }            }
1734          else          else
1735            {            {
1736            if (gmatched == 0)   /* Error if no previous matches */            if (count == PCRE_ERROR_NOMATCH)
1737              {              {
1738              if (count == -1) fprintf(outfile, "No match\n");              if (gmatched == 0) fprintf(outfile, "No match\n");
               else fprintf(outfile, "Error %d\n", count);  
1739              }              }
1740              else fprintf(outfile, "Error %d\n", count);
1741            break;  /* Out of the /g loop */            break;  /* Out of the /g loop */
1742            }            }
1743          }          }
# Line 981  while (!done) Line 1749  while (!done)
1749        /* If we have matched an empty string, first check to see if we are at        /* If we have matched an empty string, first check to see if we are at
1750        the end of the subject. If so, the /g loop is over. Otherwise, mimic        the end of the subject. If so, the /g loop is over. Otherwise, mimic
1751        what Perl's /g options does. This turns out to be rather cunning. First        what Perl's /g options does. This turns out to be rather cunning. First
1752        we set PCRE_NOTEMPTY and try the match again at the same point. If this        we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1753        fails (picked up above) we advance to the next character. */        same point. If this fails (picked up above) we advance to the next
1754          character. */
1755    
1756        g_notempty = 0;        g_notempty = 0;
1757        if (offsets[0] == offsets[1])        if (use_offsets[0] == use_offsets[1])
1758          {          {
1759          if (offsets[0] == len) break;          if (use_offsets[0] == len) break;
1760          g_notempty = PCRE_NOTEMPTY;          g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1761          }          }
1762    
1763        /* For /g, update the start offset, leaving the rest alone */        /* For /g, update the start offset, leaving the rest alone */
1764    
1765        if (do_g) start_offset = offsets[1];        if (do_g) start_offset = use_offsets[1];
1766    
1767        /* For /G, update the pointer and length */        /* For /G, update the pointer and length */
1768    
1769        else        else
1770          {          {
1771          bptr += offsets[1];          bptr += use_offsets[1];
1772          len -= offsets[1];          len -= use_offsets[1];
1773          }          }
1774        }  /* End of loop for /g and /G */        }  /* End of loop for /g and /G */
1775      }    /* End of loop for data lines */      }    /* End of loop for data lines */
# Line 1011  while (!done) Line 1780  while (!done)
1780    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1781  #endif  #endif
1782    
1783    if (re != NULL) free(re);    if (re != NULL) new_free(re);
1784    if (extra != NULL) free(extra);    if (extra != NULL) new_free(extra);
1785    if (tables != NULL)    if (tables != NULL)
1786      {      {
1787      free((void *)tables);      new_free((void *)tables);
1788      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1789      }      }
1790    }    }
1791    
1792  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1793  return 0;  
1794    EXIT:
1795    
1796    if (infile != NULL && infile != stdin) fclose(infile);
1797    if (outfile != NULL && outfile != stdout) fclose(outfile);
1798    
1799    free(buffer);
1800    free(dbuffer);
1801    free(pbuffer);
1802    free(offsets);
1803    
1804    return yield;
1805  }  }
1806    
1807  /* End */  /* End of pcretest.c */

Legend:
Removed from v.41  
changed lines
  Added in v.77

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12