/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 13 by nigel, Sat Feb 24 21:38:21 2007 UTC revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44    #include <locale.h>
45    #include <errno.h>
46    
47    #define PCRE_SPY        /* For Win32 build, import data, not export */
48    
49    /* We need the internal info for displaying the results of pcre_study() and
50    other internal data; pcretest also uses some of the fixed tables, and generally
51    has "inside information" compared to a program that strictly follows the PCRE
52    API. */
53    
54  /* Use the internal info for displaying the results of pcre_study(). */  #include "pcre_internal.h"
55    
56  #include "internal.h"  
57    /* It is possible to compile this test program without including support for
58    testing the POSIX interface, though this is not available via the standard
59    Makefile. */
60    
61    #if !defined NOPOSIX
62  #include "pcreposix.h"  #include "pcreposix.h"
63    #endif
64    
65  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
66  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 70 
70  #endif  #endif
71  #endif  #endif
72    
73    #define LOOPREPEAT 500000
74    
75    #define BUFFER_SIZE 30000
76    #define PBUFFER_SIZE BUFFER_SIZE
77    #define DBUFFER_SIZE BUFFER_SIZE
78    
79    
80  static FILE *outfile;  static FILE *outfile;
81  static int log_store = 0;  static int log_store = 0;
82    static int callout_count;
83    static int callout_extra;
84    static int callout_fail_count;
85    static int callout_fail_id;
86    static int first_callout;
87    static int show_malloc;
88    static int use_utf8;
89    static size_t gotten_store;
90    
91    static uschar *pbuffer = NULL;
92    
93    
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
94    
95  static const char *OP_names[] = {  /*************************************************
96    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  *          Read number from string               *
97    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  *************************************************/
   "not",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  
   "*", "*?", "+", "+?", "?", "??", "{", "{",  
   "class", "negclass", "Ref",  
   "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  
   "Brazero", "Braminzero", "Bra"  
 };  
   
   
 static void print_internals(pcre *re)  
 {  
 unsigned char *code = ((real_pcre *)re)->code;  
   
 printf("------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   printf("%3d ", code - ((real_pcre *)re)->code);  
   
   if (*code >= OP_BRA)  
     {  
     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     printf("    %s\n", OP_names[*code]);  
     printf("------------------------------------------------------------------\n");  
     return;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     printf("%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       printf("    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) printf("    %c", c);  
       else printf("    \\x%02x", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf("0,");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     printf("    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
     case OP_NEGCLASS:  
       {  
       int i, min, max;  
       if (*code++ == OP_CLASS) printf("    [");  
         else printf("   ^[");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
98    
99          case OP_CRRANGE:  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
100          case OP_CRMINRANGE:  around with conditional compilation, just do the job by hand. It is only used
101          min = (code[1] << 8) + code[2];  for unpicking the -o argument, so just keep it simple.
102          max = (code[3] << 8) + code[4];  
103          if (max == 0) printf("{%d,}", min);  Arguments:
104          else printf("{%d,%d}", min, max);    str           string to be converted
105          if (*code == OP_CRMINRANGE) printf("?");    endptr        where to put the end pointer
         code += 4;  
         break;  
106    
107          default:  Returns:        the unsigned long
108          code--;  */
         }  
       }  
     break;  
109    
110      /* Anything else is just a one-node item */  static int
111    get_value(unsigned char *str, unsigned char **endptr)
112    {
113    int result = 0;
114    while(*str != 0 && isspace(*str)) str++;
115    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
116    *endptr = str;
117    return(result);
118    }
119    
     default:  
     printf("    %s", OP_names[*code]);  
     break;  
     }  
120    
121    code++;  
122    printf("\n");  
123    /*************************************************
124    *            Convert UTF-8 string to value       *
125    *************************************************/
126    
127    /* This function takes one or more bytes that represents a UTF-8 character,
128    and returns the value of the character.
129    
130    Argument:
131      buffer   a pointer to the byte vector
132      vptr     a pointer to an int to receive the value
133    
134    Returns:   >  0 => the number of bytes consumed
135               -6 to 0 => malformed UTF-8 character at offset = (-return)
136    */
137    
138    static int
139    utf82ord(unsigned char *buffer, int *vptr)
140    {
141    int c = *buffer++;
142    int d = c;
143    int i, j, s;
144    
145    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
146      {
147      if ((d & 0x80) == 0) break;
148      d <<= 1;
149      }
150    
151    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
152    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
153    
154    /* i now has a value in the range 1-5 */
155    
156    s = 6*i;
157    d = (c & _pcre_utf8_table3[i]) << s;
158    
159    for (j = 0; j < i; j++)
160      {
161      c = *buffer++;
162      if ((c & 0xc0) != 0x80) return -(j+1);
163      s -= 6;
164      d |= (c & 0x3f) << s;
165    }    }
166    
167    /* Check that encoding was the correct unique one */
168    
169    for (j = 0; j < _pcre_utf8_table1_size; j++)
170      if (d <= _pcre_utf8_table1[j]) break;
171    if (j != i) return -(i+1);
172    
173    /* Valid value */
174    
175    *vptr = d;
176    return i+1;
177  }  }
178    
179    
180    
181  /* Character string printing function. */  /*************************************************
182    *             Print character string             *
183    *************************************************/
184    
185  static void pchars(unsigned char *p, int length)  /* Character string printing function. Must handle UTF-8 strings in utf8
186    mode. Yields number of characters printed. If handed a NULL file, just counts
187    chars without printing. */
188    
189    static int pchars(unsigned char *p, int length, FILE *f)
190  {  {
191  int c;  int c;
192    int yield = 0;
193    
194  while (length-- > 0)  while (length-- > 0)
195    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
196      else fprintf(outfile, "\\x%02x", c);    if (use_utf8)
197        {
198        int rc = utf82ord(p, &c);
199    
200        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
201          {
202          length -= rc - 1;
203          p += rc;
204          if (c < 256 && isprint(c))
205            {
206            if (f != NULL) fprintf(f, "%c", c);
207            yield++;
208            }
209          else
210            {
211            int n;
212            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
213            yield += n;
214            }
215          continue;
216          }
217        }
218    
219       /* Not UTF-8, or malformed UTF-8  */
220    
221      if (isprint(c = *(p++)))
222        {
223        if (f != NULL) fprintf(f, "%c", c);
224        yield++;
225        }
226      else
227        {
228        if (f != NULL) fprintf(f, "\\x%02x", c);
229        yield += 4;
230        }
231      }
232    
233    return yield;
234    }
235    
236    
237    
238    /*************************************************
239    *              Callout function                  *
240    *************************************************/
241    
242    /* Called from PCRE as a result of the (?C) item. We print out where we are in
243    the match. Yield zero unless more callouts than the fail count, or the callout
244    data is not zero. */
245    
246    static int callout(pcre_callout_block *cb)
247    {
248    FILE *f = (first_callout | callout_extra)? outfile : NULL;
249    int i, pre_start, post_start, subject_length;
250    
251    if (callout_extra)
252      {
253      fprintf(f, "Callout %d: last capture = %d\n",
254        cb->callout_number, cb->capture_last);
255    
256      for (i = 0; i < cb->capture_top * 2; i += 2)
257        {
258        if (cb->offset_vector[i] < 0)
259          fprintf(f, "%2d: <unset>\n", i/2);
260        else
261          {
262          fprintf(f, "%2d: ", i/2);
263          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
264            cb->offset_vector[i+1] - cb->offset_vector[i], f);
265          fprintf(f, "\n");
266          }
267        }
268      }
269    
270    /* Re-print the subject in canonical form, the first time or if giving full
271    datails. On subsequent calls in the same match, we use pchars just to find the
272    printed lengths of the substrings. */
273    
274    if (f != NULL) fprintf(f, "--->");
275    
276    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
277    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
278      cb->current_position - cb->start_match, f);
279    
280    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
281    
282    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
283      cb->subject_length - cb->current_position, f);
284    
285    if (f != NULL) fprintf(f, "\n");
286    
287    /* Always print appropriate indicators, with callout number if not already
288    shown. For automatic callouts, show the pattern offset. */
289    
290    if (cb->callout_number == 255)
291      {
292      fprintf(outfile, "%+3d ", cb->pattern_position);
293      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
294      }
295    else
296      {
297      if (callout_extra) fprintf(outfile, "    ");
298        else fprintf(outfile, "%3d ", cb->callout_number);
299      }
300    
301    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
302    fprintf(outfile, "^");
303    
304    if (post_start > 0)
305      {
306      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
307      fprintf(outfile, "^");
308      }
309    
310    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
311      fprintf(outfile, " ");
312    
313    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
314      pbuffer + cb->pattern_position);
315    
316    fprintf(outfile, "\n");
317    first_callout = 0;
318    
319    if (cb->callout_data != NULL)
320      {
321      int callout_data = *((int *)(cb->callout_data));
322      if (callout_data != 0)
323        {
324        fprintf(outfile, "Callout data = %d\n", callout_data);
325        return callout_data;
326        }
327      }
328    
329    return (cb->callout_number != callout_fail_id)? 0 :
330           (++callout_count >= callout_fail_count)? 1 : 0;
331  }  }
332    
333    
334    /*************************************************
335    *            Local malloc functions              *
336    *************************************************/
337    
338  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
339  compiled re. */  compiled re. */
340    
341  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
342  {  {
343  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
344  return malloc(size);  gotten_store = size;
345    if (show_malloc)
346      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
347    return block;
348    }
349    
350    static void new_free(void *block)
351    {
352    if (show_malloc)
353      fprintf(outfile, "free             %p\n", block);
354    free(block);
355    }
356    
357    
358    /* For recursion malloc/free, to test stacking calls */
359    
360    static void *stack_malloc(size_t size)
361    {
362    void *block = malloc(size);
363    if (show_malloc)
364      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
365    return block;
366    }
367    
368    static void stack_free(void *block)
369    {
370    if (show_malloc)
371      fprintf(outfile, "stack_free       %p\n", block);
372    free(block);
373    }
374    
375    
376    /*************************************************
377    *          Call pcre_fullinfo()                  *
378    *************************************************/
379    
380    /* Get one piece of information from the pcre_fullinfo() function */
381    
382    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
383    {
384    int rc;
385    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
386      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
387  }  }
388    
389    
390    
391    /*************************************************
392    *         Byte flipping function                 *
393    *************************************************/
394    
395    static long int
396    byteflip(long int value, int n)
397    {
398    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
399    return ((value & 0x000000ff) << 24) |
400           ((value & 0x0000ff00) <<  8) |
401           ((value & 0x00ff0000) >>  8) |
402           ((value & 0xff000000) >> 24);
403    }
404    
405    
406    
407    
408    /*************************************************
409    *                Main Program                    *
410    *************************************************/
411    
412  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
413  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
414  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 266  int study_options = 0; Line 421  int study_options = 0;
421  int op = 1;  int op = 1;
422  int timeit = 0;  int timeit = 0;
423  int showinfo = 0;  int showinfo = 0;
424    int showstore = 0;
425    int size_offsets = 45;
426    int size_offsets_max;
427    int *offsets = NULL;
428    #if !defined NOPOSIX
429  int posix = 0;  int posix = 0;
430    #endif
431  int debug = 0;  int debug = 0;
432  int done = 0;  int done = 0;
433  unsigned char buffer[30000];  int all_use_dfa = 0;
434  unsigned char dbuffer[1024];  int yield = 0;
435    
436    unsigned char *buffer;
437    unsigned char *dbuffer;
438    
439    /* Get buffers from malloc() so that Electric Fence will check their misuse
440    when I am debugging. */
441    
442  /* Static so that new_malloc can use it. */  buffer = (unsigned char *)malloc(BUFFER_SIZE);
443    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
444    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
445    
446    /* The outfile variable is static so that new_malloc can use it. The _setmode()
447    stuff is some magic that I don't understand, but which apparently does good
448    things in Windows. It's related to line terminations.  */
449    
450    #if defined(_WIN32) || defined(WIN32)
451    _setmode( _fileno( stdout ), 0x8000 );
452    #endif  /* defined(_WIN32) || defined(WIN32) */
453    
454  outfile = stdout;  outfile = stdout;
455    
# Line 280  outfile = stdout; Line 457  outfile = stdout;
457    
458  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
459    {    {
460    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
461    
462      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
463        showstore = 1;
464    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
465    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
466    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
467      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
468      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
469          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
470            *endptr == 0))
471        {
472        op++;
473        argc--;
474        }
475    #if !defined NOPOSIX
476    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
477    #endif
478      else if (strcmp(argv[op], "-C") == 0)
479        {
480        int rc;
481        printf("PCRE version %s\n", pcre_version());
482        printf("Compiled with\n");
483        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
484        printf("  %sUTF-8 support\n", rc? "" : "No ");
485        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
486        printf("  %sUnicode properties support\n", rc? "" : "No ");
487        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
488        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
489        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
490        printf("  Internal link size = %d\n", rc);
491        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
492        printf("  POSIX malloc threshold = %d\n", rc);
493        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
494        printf("  Default match limit = %d\n", rc);
495        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
496        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
497        exit(0);
498        }
499    else    else
500      {      {
501      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
502      return 1;      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
503        printf("  -C     show PCRE compile-time options and exit\n");
504        printf("  -d     debug: show compiled code; implies -i\n");
505        printf("  -dfa   force DFA matching for all subjects\n");
506        printf("  -i     show information about compiled pattern\n"
507               "  -m     output memory used information\n"
508               "  -o <n> set size of offsets vector to <n>\n");
509    #if !defined NOPOSIX
510        printf("  -p     use POSIX interface\n");
511    #endif
512        printf("  -s     output store (memory) used information\n"
513               "  -t     time compilation and execution\n");
514        yield = 1;
515        goto EXIT;
516      }      }
517    op++;    op++;
518    argc--;    argc--;
519    }    }
520    
521    /* Get the store for the offsets vector, and remember what it was */
522    
523    size_offsets_max = size_offsets;
524    offsets = (int *)malloc(size_offsets_max * sizeof(int));
525    if (offsets == NULL)
526      {
527      printf("** Failed to get %d bytes of memory for offsets vector\n",
528        size_offsets_max * sizeof(int));
529      yield = 1;
530      goto EXIT;
531      }
532    
533  /* Sort out the input and output files */  /* Sort out the input and output files */
534    
535  if (argc > 1)  if (argc > 1)
536    {    {
537    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
538    if (infile == NULL)    if (infile == NULL)
539      {      {
540      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
541      return 1;      yield = 1;
542        goto EXIT;
543      }      }
544    }    }
545    
546  if (argc > 2)  if (argc > 2)
547    {    {
548    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
549    if (outfile == NULL)    if (outfile == NULL)
550      {      {
551      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
552      return 1;      yield = 1;
553        goto EXIT;
554      }      }
555    }    }
556    
557  /* Set alternative malloc function */  /* Set alternative malloc function */
558    
559  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
560    pcre_free = new_free;
561    pcre_stack_malloc = stack_malloc;
562    pcre_stack_free = stack_free;
563    
564  /* Heading line, then prompt for first re if stdin */  /* Heading line, then prompt for first regex if stdin */
565    
 fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  
566  fprintf(outfile, "PCRE version %s\n\n", pcre_version());  fprintf(outfile, "PCRE version %s\n\n", pcre_version());
567    
568  /* Main loop */  /* Main loop */
# Line 331  while (!done) Line 571  while (!done)
571    {    {
572    pcre *re = NULL;    pcre *re = NULL;
573    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
574    
575    #if !defined NOPOSIX  /* There are still compilers that require no indent */
576    regex_t preg;    regex_t preg;
577      int do_posix = 0;
578    #endif
579    
580    const char *error;    const char *error;
581    unsigned char *p, *pp;    unsigned char *p, *pp, *ppp;
582      unsigned char *to_file = NULL;
583      const unsigned char *tables = NULL;
584      unsigned long int true_size, true_study_size = 0;
585      size_t size, regex_gotten_store;
586    int do_study = 0;    int do_study = 0;
587    int do_debug = 0;    int do_debug = debug;
588    int do_posix = 0;    int do_G = 0;
589      int do_g = 0;
590      int do_showinfo = showinfo;
591      int do_showrest = 0;
592      int do_flip = 0;
593    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
594    
595      use_utf8 = 0;
596    
597    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
598    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
599    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
600      fflush(outfile);
601    
602    p = buffer;    p = buffer;
603    while (isspace(*p)) p++;    while (isspace(*p)) p++;
604    if (*p == 0) continue;    if (*p == 0) continue;
605    
606    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
607    complete, read more. */  
608      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
609        {
610        unsigned long int magic;
611        uschar sbuf[8];
612        FILE *f;
613    
614        p++;
615        pp = p + (int)strlen((char *)p);
616        while (isspace(pp[-1])) pp--;
617        *pp = 0;
618    
619        f = fopen((char *)p, "rb");
620        if (f == NULL)
621          {
622          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
623          continue;
624          }
625    
626        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
627    
628        true_size =
629          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
630        true_study_size =
631          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
632    
633        re = (real_pcre *)new_malloc(true_size);
634        regex_gotten_store = gotten_store;
635    
636        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
637    
638        magic = ((real_pcre *)re)->magic_number;
639        if (magic != MAGIC_NUMBER)
640          {
641          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
642            {
643            do_flip = 1;
644            }
645          else
646            {
647            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
648            fclose(f);
649            continue;
650            }
651          }
652    
653        fprintf(outfile, "Compiled regex%s loaded from %s\n",
654          do_flip? " (byte-inverted)" : "", p);
655    
656        /* Need to know if UTF-8 for printing data strings */
657    
658        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
659        use_utf8 = (options & PCRE_UTF8) != 0;
660    
661        /* Now see if there is any following study data */
662    
663        if (true_study_size != 0)
664          {
665          pcre_study_data *psd;
666    
667          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
668          extra->flags = PCRE_EXTRA_STUDY_DATA;
669    
670          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
671          extra->study_data = psd;
672    
673          if (fread(psd, 1, true_study_size, f) != true_study_size)
674            {
675            FAIL_READ:
676            fprintf(outfile, "Failed to read data from %s\n", p);
677            if (extra != NULL) new_free(extra);
678            if (re != NULL) new_free(re);
679            fclose(f);
680            continue;
681            }
682          fprintf(outfile, "Study data loaded from %s\n", p);
683          do_study = 1;     /* To get the data output if requested */
684          }
685        else fprintf(outfile, "No study data\n");
686    
687        fclose(f);
688        goto SHOW_INFO;
689        }
690    
691      /* In-line pattern (the usual case). Get the delimiter and seek the end of
692      the pattern; if is isn't complete, read more. */
693    
694    delimiter = *p++;    delimiter = *p++;
695    
696    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
697      {      {
698      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
699      goto SKIP_DATA;      goto SKIP_DATA;
700      }      }
701    
# Line 362  while (!done) Line 703  while (!done)
703    
704    for(;;)    for(;;)
705      {      {
706      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
707          {
708          if (*pp == '\\' && pp[1] != 0) pp++;
709            else if (*pp == delimiter) break;
710          pp++;
711          }
712      if (*pp != 0) break;      if (*pp != 0) break;
713    
714      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
715      if (len < 256)      if (len < 256)
716        {        {
717        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 379  while (!done) Line 725  while (!done)
725        done = 1;        done = 1;
726        goto CONTINUE;        goto CONTINUE;
727        }        }
728      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
729      }      }
730    
731    /* Terminate the pattern at the delimiter */    /* If the first character after the delimiter is backslash, make
732      the pattern end with backslash. This is purely to provide a way
733      of testing for the error message when a pattern ends with backslash. */
734    
735      if (pp[1] == '\\') *pp++ = '\\';
736    
737      /* Terminate the pattern at the delimiter, and save a copy of the pattern
738      for callouts. */
739    
740    *pp++ = 0;    *pp++ = 0;
741      strcpy((char *)pbuffer, (char *)p);
742    
743    /* Look for options after final delimiter */    /* Look for options after final delimiter */
744    
745    options = 0;    options = 0;
746    study_options = 0;    study_options = 0;
747      log_store = showstore;  /* default from command line */
748    
749    while (*pp != 0)    while (*pp != 0)
750      {      {
751      switch (*pp++)      switch (*pp++)
752        {        {
753          case 'f': options |= PCRE_FIRSTLINE; break;
754          case 'g': do_g = 1; break;
755        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
756        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
757        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
758        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
759    
760          case '+': do_showrest = 1; break;
761        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
762        case 'D': do_debug = 1; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
763          case 'D': do_debug = do_showinfo = 1; break;
764        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
765          case 'F': do_flip = 1; break;
766          case 'G': do_G = 1; break;
767          case 'I': do_showinfo = 1; break;
768          case 'M': log_store = 1; break;
769          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
770    
771    #if !defined NOPOSIX
772        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
773    #endif
774    
775        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
776        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
777        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
778        case '\n': case ' ': break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
779          case '?': options |= PCRE_NO_UTF8_CHECK; break;
780    
781          case 'L':
782          ppp = pp;
783          /* The '\r' test here is so that it works on Windows */
784          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
785          *ppp = 0;
786          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
787            {
788            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
789            goto SKIP_DATA;
790            }
791          tables = pcre_maketables();
792          pp = ppp;
793          break;
794    
795          case '>':
796          to_file = pp;
797          while (*pp != 0) pp++;
798          while (isspace(pp[-1])) pp--;
799          *pp = 0;
800          break;
801    
802          case '\r':                      /* So that it works in Windows */
803          case '\n':
804          case ' ':
805          break;
806    
807        default:        default:
808        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
809        goto SKIP_DATA;        goto SKIP_DATA;
# Line 413  while (!done) Line 811  while (!done)
811      }      }
812    
813    /* Handle compiling via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
814    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
815      local character tables. */
816    
817    #if !defined NOPOSIX
818    if (posix || do_posix)    if (posix || do_posix)
819      {      {
820      int rc;      int rc;
821      int cflags = 0;      int cflags = 0;
822    
823      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
824      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
825        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
826      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
827    
828      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 428  while (!done) Line 830  while (!done)
830    
831      if (rc != 0)      if (rc != 0)
832        {        {
833        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
834        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
835        goto SKIP_DATA;        goto SKIP_DATA;
836        }        }
# Line 437  while (!done) Line 839  while (!done)
839    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
840    
841    else    else
842    #endif  /* !defined NOPOSIX */
843    
844      {      {
845      if (timeit)      if (timeit)
846        {        {
847        register int i;        register int i;
848        clock_t time_taken;        clock_t time_taken;
849        clock_t start_time = clock();        clock_t start_time = clock();
850        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
851          {          {
852          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
853          if (re != NULL) free(re);          if (re != NULL) free(re);
854          }          }
855        time_taken = clock() - start_time;        time_taken = clock() - start_time;
856        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
857          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
858              (double)CLOCKS_PER_SEC);
859        }        }
860    
861      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
862    
863      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
864      if non-interactive. */      if non-interactive. */
# Line 466  while (!done) Line 871  while (!done)
871          {          {
872          for (;;)          for (;;)
873            {            {
874            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
875              {              {
876              done = 1;              done = 1;
877              goto CONTINUE;              goto CONTINUE;
# Line 477  while (!done) Line 882  while (!done)
882            }            }
883          fprintf(outfile, "\n");          fprintf(outfile, "\n");
884          }          }
885        continue;        goto CONTINUE;
886        }        }
887    
888      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
889        info-returning functions. The old one has a limited interface and
890        returns only limited data. Check that it agrees with the newer one. */
891    
892        if (log_store)
893          fprintf(outfile, "Memory allocation (code space): %d\n",
894            (int)(gotten_store -
895                  sizeof(real_pcre) -
896                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
897    
898      if (showinfo || do_debug)      /* Extract the size for possible writing before possibly flipping it,
899        {      and remember the store that was got. */
       int first_char, count;  
900    
901        if (debug || do_debug) print_internals(re);      true_size = ((real_pcre *)re)->size;
902        regex_gotten_store = gotten_store;
       count = pcre_info(re, &options, &first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d while reading info\n", count);  
       else  
         {  
         fprintf(outfile, "Identifying subpattern count = %d\n", count);  
         if (options == 0) fprintf(outfile, "No options\n");  
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "");  
         if (first_char == -1)  
           {  
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
         else  
           {  
           if (isprint(first_char))  
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
         }  
       }  
903    
904      /* If /S was present, study the regexp to generate additional info to      /* If /S was present, study the regexp to generate additional info to
905      help with the matching. */      help with the matching. */
# Line 531  while (!done) Line 911  while (!done)
911          register int i;          register int i;
912          clock_t time_taken;          clock_t time_taken;
913          clock_t start_time = clock();          clock_t start_time = clock();
914          for (i = 0; i < 4000; i++)          for (i = 0; i < LOOPREPEAT; i++)
915            extra = pcre_study(re, study_options, &error);            extra = pcre_study(re, study_options, &error);
916          time_taken = clock() - start_time;          time_taken = clock() - start_time;
917          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
918          fprintf(outfile, "  Study time %.2f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
919            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
920                (double)CLOCKS_PER_SEC);
921          }          }
   
922        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
923        if (error != NULL)        if (error != NULL)
924          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
925        else if (extra == NULL)        else if (extra != NULL)
926          fprintf(outfile, "Study returned NULL\n");          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
927          }
928    
929        /* If the 'F' option was present, we flip the bytes of all the integer
930        fields in the regex data block and the study block. This is to make it
931        possible to test PCRE's handling of byte-flipped patterns, e.g. those
932        compiled on a different architecture. */
933    
934        if (do_flip)
935          {
936          real_pcre *rre = (real_pcre *)re;
937          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
938          rre->size = byteflip(rre->size, sizeof(rre->size));
939          rre->options = byteflip(rre->options, sizeof(rre->options));
940          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
941          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
942          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
943          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
944          rre->name_table_offset = byteflip(rre->name_table_offset,
945            sizeof(rre->name_table_offset));
946          rre->name_entry_size = byteflip(rre->name_entry_size,
947            sizeof(rre->name_entry_size));
948          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
949    
950        /* This looks at internal information. A bit kludgy to do it this        if (extra != NULL)
951        way, but it is useful for testing. */          {
952            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
953            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
954            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
955            }
956          }
957    
958        /* Extract information from the compiled data if required */
959    
960        SHOW_INFO:
961    
962        if (do_showinfo)
963          {
964          unsigned long int get_options, all_options;
965          int old_first_char, old_options, old_count;
966          int count, backrefmax, first_char, need_char;
967          int nameentrysize, namecount;
968          const uschar *nametable;
969    
970        else if (showinfo || do_debug)        if (do_debug)
971          {          {
972          real_pcre_extra *xx = (real_pcre_extra *)extra;          fprintf(outfile, "------------------------------------------------------------------\n");
973          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          _pcre_printint(re, outfile);
974            fprintf(outfile, "No starting character set\n");          }
975    
976          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
977          new_info(re, NULL, PCRE_INFO_SIZE, &size);
978          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
979          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
980          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
981          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
982          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
983          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
984          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
985    
986          old_count = pcre_info(re, &old_options, &old_first_char);
987          if (count < 0) fprintf(outfile,
988            "Error %d from pcre_info()\n", count);
989          else
990            {
991            if (old_count != count) fprintf(outfile,
992              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
993                old_count);
994    
995            if (old_first_char != first_char) fprintf(outfile,
996              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
997                first_char, old_first_char);
998    
999            if (old_options != (int)get_options) fprintf(outfile,
1000              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1001                get_options, old_options);
1002            }
1003    
1004          if (size != regex_gotten_store) fprintf(outfile,
1005            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1006            (int)size, (int)regex_gotten_store);
1007    
1008          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1009          if (backrefmax > 0)
1010            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1011    
1012          if (namecount > 0)
1013            {
1014            fprintf(outfile, "Named capturing subpatterns:\n");
1015            while (namecount-- > 0)
1016              {
1017              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1018                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1019                GET2(nametable, 0));
1020              nametable += nameentrysize;
1021              }
1022            }
1023    
1024          /* The NOPARTIAL bit is a private bit in the options, so we have
1025          to fish it out via out back door */
1026    
1027          all_options = ((real_pcre *)re)->options;
1028          if (do_flip)
1029            {
1030            all_options = byteflip(all_options, sizeof(all_options));
1031            }
1032    
1033          if ((all_options & PCRE_NOPARTIAL) != 0)
1034            fprintf(outfile, "Partial matching not supported\n");
1035    
1036          if (get_options == 0) fprintf(outfile, "No options\n");
1037            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1038              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1039              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1040              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1041              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1042              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1043              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1044              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1045              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1046              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1047              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1048              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1049    
1050          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1051            fprintf(outfile, "Case state changes\n");
1052    
1053          if (first_char == -1)
1054            {
1055            fprintf(outfile, "First char at start or follows \\n\n");
1056            }
1057          else if (first_char < 0)
1058            {
1059            fprintf(outfile, "No first char\n");
1060            }
1061          else
1062            {
1063            int ch = first_char & 255;
1064            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1065              "" : " (caseless)";
1066            if (isprint(ch))
1067              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1068            else
1069              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1070            }
1071    
1072          if (need_char < 0)
1073            {
1074            fprintf(outfile, "No need char\n");
1075            }
1076          else
1077            {
1078            int ch = need_char & 255;
1079            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1080              "" : " (caseless)";
1081            if (isprint(ch))
1082              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1083            else
1084              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1085            }
1086    
1087          /* Don't output study size; at present it is in any case a fixed
1088          value, but it varies, depending on the computer architecture, and
1089          so messes up the test suite. (And with the /F option, it might be
1090          flipped.) */
1091    
1092          if (do_study)
1093            {
1094            if (extra == NULL)
1095              fprintf(outfile, "Study returned NULL\n");
1096          else          else
1097            {            {
1098            int i;            uschar *start_bits = NULL;
1099            int c = 24;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1100            fprintf(outfile, "Starting character set: ");  
1101            for (i = 0; i < 256; i++)            if (start_bits == NULL)
1102                fprintf(outfile, "No starting byte set\n");
1103              else
1104              {              {
1105              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              int i;
1106                int c = 24;
1107                fprintf(outfile, "Starting byte set: ");
1108                for (i = 0; i < 256; i++)
1109                {                {
1110                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
1111                  {                  {
1112                  fprintf(outfile, "\n  ");                  if (c > 75)
1113                  c = 2;                    {
1114                  }                    fprintf(outfile, "\n  ");
1115                if (isprint(i) && i != ' ')                    c = 2;
1116                  {                    }
1117                  fprintf(outfile, "%c ", i);                  if (isprint(i) && i != ' ')
1118                  c += 2;                    {
1119                  }                    fprintf(outfile, "%c ", i);
1120                else                    c += 2;
1121                  {                    }
1122                  fprintf(outfile, "\\x%02x ", i);                  else
1123                  c += 5;                    {
1124                      fprintf(outfile, "\\x%02x ", i);
1125                      c += 5;
1126                      }
1127                  }                  }
1128                }                }
1129                fprintf(outfile, "\n");
1130              }              }
           fprintf(outfile, "\n");  
1131            }            }
1132          }          }
1133        }        }
1134      }  
1135        /* If the '>' option was present, we write out the regex to a file, and
1136        that is all. The first 8 bytes of the file are the regex length and then
1137        the study length, in big-endian order. */
1138    
1139        if (to_file != NULL)
1140          {
1141          FILE *f = fopen((char *)to_file, "wb");
1142          if (f == NULL)
1143            {
1144            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1145            }
1146          else
1147            {
1148            uschar sbuf[8];
1149            sbuf[0] = (true_size >> 24)  & 255;
1150            sbuf[1] = (true_size >> 16)  & 255;
1151            sbuf[2] = (true_size >>  8)  & 255;
1152            sbuf[3] = (true_size)  & 255;
1153    
1154            sbuf[4] = (true_study_size >> 24)  & 255;
1155            sbuf[5] = (true_study_size >> 16)  & 255;
1156            sbuf[6] = (true_study_size >>  8)  & 255;
1157            sbuf[7] = (true_study_size)  & 255;
1158    
1159            if (fwrite(sbuf, 1, 8, f) < 8 ||
1160                fwrite(re, 1, true_size, f) < true_size)
1161              {
1162              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1163              }
1164            else
1165              {
1166              fprintf(outfile, "Compiled regex written to %s\n", to_file);
1167              if (extra != NULL)
1168                {
1169                if (fwrite(extra->study_data, 1, true_study_size, f) <
1170                    true_study_size)
1171                  {
1172                  fprintf(outfile, "Write error on %s: %s\n", to_file,
1173                    strerror(errno));
1174                  }
1175                else fprintf(outfile, "Study data written to %s\n", to_file);
1176                }
1177              }
1178            fclose(f);
1179            }
1180    
1181          new_free(re);
1182          if (extra != NULL) new_free(extra);
1183          if (tables != NULL) new_free((void *)tables);
1184          continue;  /* With next regex */
1185          }
1186        }        /* End of non-POSIX compile */
1187    
1188    /* Read data lines and test them */    /* Read data lines and test them */
1189    
1190    for (;;)    for (;;)
1191      {      {
1192      unsigned char *q;      unsigned char *q;
1193        unsigned char *bptr = dbuffer;
1194        int *use_offsets = offsets;
1195        int use_size_offsets = size_offsets;
1196        int callout_data = 0;
1197        int callout_data_set = 0;
1198      int count, c;      int count, c;
1199      int offsets[30];      int copystrings = 0;
1200      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
1201        int getstrings = 0;
1202        int getlist = 0;
1203        int gmatched = 0;
1204        int start_offset = 0;
1205        int g_notempty = 0;
1206        int use_dfa = 0;
1207    
1208      options = 0;      options = 0;
1209    
1210      if (infile == stdin) printf("  data> ");      pcre_callout = callout;
1211      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      first_callout = 1;
1212        callout_extra = 0;
1213        callout_count = 0;
1214        callout_fail_count = 999999;
1215        callout_fail_id = -1;
1216        show_malloc = 0;
1217    
1218        if (infile == stdin) printf("data> ");
1219        if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1220        {        {
1221        done = 1;        done = 1;
1222        goto CONTINUE;        goto CONTINUE;
1223        }        }
1224      if (infile != stdin) fprintf(outfile, (char *)buffer);      if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1225    
1226      len = (int)strlen((char *)buffer);      len = (int)strlen((char *)buffer);
1227      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
# Line 617  while (!done) Line 1236  while (!done)
1236        {        {
1237        int i = 0;        int i = 0;
1238        int n = 0;        int n = 0;
1239    
1240        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1241          {          {
1242          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 636  while (!done) Line 1256  while (!done)
1256          break;          break;
1257    
1258          case 'x':          case 'x':
1259    
1260            /* Handle \x{..} specially - new Perl thing for utf8 */
1261    
1262            if (*p == '{')
1263              {
1264              unsigned char *pt = p;
1265              c = 0;
1266              while (isxdigit(*(++pt)))
1267                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1268              if (*pt == '}')
1269                {
1270                unsigned char buff8[8];
1271                int ii, utn;
1272                utn = _pcre_ord2utf8(c, buff8);
1273                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1274                c = buff8[ii];   /* Last byte */
1275                p = pt + 1;
1276                break;
1277                }
1278              /* Not correct form; fall through */
1279              }
1280    
1281            /* Ordinary \x */
1282    
1283          c = 0;          c = 0;
1284          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1285            {            {
# Line 644  while (!done) Line 1288  while (!done)
1288            }            }
1289          break;          break;
1290    
1291          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1292          p--;          p--;
1293          continue;          continue;
1294    
1295            case '>':
1296            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1297            continue;
1298    
1299          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1300          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1301          continue;          continue;
# Line 656  while (!done) Line 1304  while (!done)
1304          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1305          continue;          continue;
1306    
1307          case 'E':          case 'C':
1308          options |= PCRE_DOLLAR_ENDONLY;          if (isdigit(*p))    /* Set copy string */
1309              {
1310              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1311              copystrings |= 1 << n;
1312              }
1313            else if (isalnum(*p))
1314              {
1315              uschar name[256];
1316              uschar *npp = name;
1317              while (isalnum(*p)) *npp++ = *p++;
1318              *npp = 0;
1319              n = pcre_get_stringnumber(re, (char *)name);
1320              if (n < 0)
1321                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1322              else copystrings |= 1 << n;
1323              }
1324            else if (*p == '+')
1325              {
1326              callout_extra = 1;
1327              p++;
1328              }
1329            else if (*p == '-')
1330              {
1331              pcre_callout = NULL;
1332              p++;
1333              }
1334            else if (*p == '!')
1335              {
1336              callout_fail_id = 0;
1337              p++;
1338              while(isdigit(*p))
1339                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1340              callout_fail_count = 0;
1341              if (*p == '!')
1342                {
1343                p++;
1344                while(isdigit(*p))
1345                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1346                }
1347              }
1348            else if (*p == '*')
1349              {
1350              int sign = 1;
1351              callout_data = 0;
1352              if (*(++p) == '-') { sign = -1; p++; }
1353              while(isdigit(*p))
1354                callout_data = callout_data * 10 + *p++ - '0';
1355              callout_data *= sign;
1356              callout_data_set = 1;
1357              }
1358            continue;
1359    
1360            case 'D':
1361            if (posix || do_posix)
1362              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1363            else
1364              use_dfa = 1;
1365          continue;          continue;
1366    
1367          case 'I':          case 'F':
1368          options |= PCRE_CASELESS;          options |= PCRE_DFA_SHORTEST;
1369            continue;
1370    
1371            case 'G':
1372            if (isdigit(*p))
1373              {
1374              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1375              getstrings |= 1 << n;
1376              }
1377            else if (isalnum(*p))
1378              {
1379              uschar name[256];
1380              uschar *npp = name;
1381              while (isalnum(*p)) *npp++ = *p++;
1382              *npp = 0;
1383              n = pcre_get_stringnumber(re, (char *)name);
1384              if (n < 0)
1385                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1386              else getstrings |= 1 << n;
1387              }
1388            continue;
1389    
1390            case 'L':
1391            getlist = 1;
1392          continue;          continue;
1393    
1394          case 'M':          case 'M':
1395          options |= PCRE_MULTILINE;          find_match_limit = 1;
1396          continue;          continue;
1397    
1398          case 'S':          case 'N':
1399          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1400          continue;          continue;
1401    
1402          case 'O':          case 'O':
1403          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1404          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1405              {
1406              size_offsets_max = n;
1407              free(offsets);
1408              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1409              if (offsets == NULL)
1410                {
1411                printf("** Failed to get %d bytes of memory for offsets vector\n",
1412                  size_offsets_max * sizeof(int));
1413                yield = 1;
1414                goto EXIT;
1415                }
1416              }
1417            use_size_offsets = n;
1418            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1419            continue;
1420    
1421            case 'P':
1422            options |= PCRE_PARTIAL;
1423            continue;
1424    
1425            case 'R':
1426            options |= PCRE_DFA_RESTART;
1427            continue;
1428    
1429            case 'S':
1430            show_malloc = 1;
1431          continue;          continue;
1432    
1433          case 'Z':          case 'Z':
1434          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1435          continue;          continue;
1436    
1437            case '?':
1438            options |= PCRE_NO_UTF8_CHECK;
1439            continue;
1440          }          }
1441        *q++ = c;        *q++ = c;
1442        }        }
1443      *q = 0;      *q = 0;
1444      len = q - dbuffer;      len = q - dbuffer;
1445    
1446        if ((all_use_dfa || use_dfa) && find_match_limit)
1447          {
1448          printf("**Match limit not relevant for DFA matching: ignored\n");
1449          find_match_limit = 0;
1450          }
1451    
1452      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1453      support timing. */      support timing or playing with the match limit or callout data. */
1454    
1455    #if !defined NOPOSIX
1456      if (posix || do_posix)      if (posix || do_posix)
1457        {        {
1458        int rc;        int rc;
1459        int eflags = 0;        int eflags = 0;
1460        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1461          if (use_size_offsets > 0)
1462            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1463        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1464        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1465    
1466        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1467    
1468        if (rc != 0)        if (rc != 0)
1469          {          {
1470          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1471          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1472          }          }
1473        else        else
1474          {          {
1475          size_t i;          size_t i;
1476          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1477            {            {
1478            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1479              {              {
1480              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
1481              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1482                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1483              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1484                if (i == 0 && do_showrest)
1485                  {
1486                  fprintf(outfile, " 0+ ");
1487                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1488                    outfile);
1489                  fprintf(outfile, "\n");
1490                  }
1491              }              }
1492            }            }
1493          }          }
1494          free(pmatch);
1495        }        }
1496    
1497      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1498    
1499      else      else
1500    #endif  /* !defined NOPOSIX */
1501    
1502        for (;; gmatched++)    /* Loop for /g or /G */
1503        {        {
1504        if (timeit)        if (timeit)
1505          {          {
1506          register int i;          register int i;
1507          clock_t time_taken;          clock_t time_taken;
1508          clock_t start_time = clock();          clock_t start_time = clock();
1509          for (i = 0; i < 4000; i++)  
1510            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,          if (all_use_dfa || use_dfa)
1511              size_offsets);            {
1512              int workspace[1000];
1513              for (i = 0; i < LOOPREPEAT; i++)
1514                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1515                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1516                  sizeof(workspace)/sizeof(int));
1517              }
1518            else
1519    
1520            for (i = 0; i < LOOPREPEAT; i++)
1521              count = pcre_exec(re, extra, (char *)bptr, len,
1522                start_offset, options | g_notempty, use_offsets, use_size_offsets);
1523    
1524          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1525          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1526            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1527                (double)CLOCKS_PER_SEC);
1528            }
1529    
1530          /* If find_match_limit is set, we want to do repeated matches with
1531          varying limits in order to find the minimum value. */
1532    
1533          if (find_match_limit)
1534            {
1535            int min = 0;
1536            int mid = 64;
1537            int max = -1;
1538    
1539            if (extra == NULL)
1540              {
1541              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1542              extra->flags = 0;
1543              }
1544            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1545    
1546            for (;;)
1547              {
1548              extra->match_limit = mid;
1549              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1550                options | g_notempty, use_offsets, use_size_offsets);
1551              if (count == PCRE_ERROR_MATCHLIMIT)
1552                {
1553                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1554                min = mid;
1555                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1556                }
1557              else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1558                                     count == PCRE_ERROR_PARTIAL)
1559                {
1560                if (mid == min + 1)
1561                  {
1562                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1563                  break;
1564                  }
1565                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1566                max = mid;
1567                mid = (min + mid)/2;
1568                }
1569              else break;    /* Some other error */
1570              }
1571    
1572            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1573          }          }
1574    
1575        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* If callout_data is set, use the interface with additional data */
         size_offsets);  
1576    
1577        if (count == 0)        else if (callout_data_set)
1578          {          {
1579          fprintf(outfile, "Matched, but too many substrings\n");          if (extra == NULL)
1580          count = size_offsets/2;            {
1581              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1582              extra->flags = 0;
1583              }
1584            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1585            extra->callout_data = &callout_data;
1586            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1587              options | g_notempty, use_offsets, use_size_offsets);
1588            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1589          }          }
1590    
1591          /* The normal case is just to do the match once, with the default
1592          value of match_limit. */
1593    
1594          else if (all_use_dfa || use_dfa)
1595            {
1596            int workspace[1000];
1597            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1598              options | g_notempty, use_offsets, use_size_offsets, workspace,
1599              sizeof(workspace)/sizeof(int));
1600            if (count == 0)
1601              {
1602              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1603              count = use_size_offsets/2;
1604              }
1605            }
1606    
1607          else
1608            {
1609            count = pcre_exec(re, extra, (char *)bptr, len,
1610              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1611            if (count == 0)
1612              {
1613              fprintf(outfile, "Matched, but too many substrings\n");
1614              count = use_size_offsets/3;
1615              }
1616            }
1617    
1618          /* Matched */
1619    
1620        if (count >= 0)        if (count >= 0)
1621          {          {
1622          int i;          int i;
1623          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1624            {            {
1625            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1626              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1627            else            else
1628              {              {
1629              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1630              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1631                  use_offsets[i+1] - use_offsets[i], outfile);
1632              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1633                if (i == 0)
1634                  {
1635                  if (do_showrest)
1636                    {
1637                    fprintf(outfile, " 0+ ");
1638                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1639                      outfile);
1640                    fprintf(outfile, "\n");
1641                    }
1642                  }
1643                }
1644              }
1645    
1646            for (i = 0; i < 32; i++)
1647              {
1648              if ((copystrings & (1 << i)) != 0)
1649                {
1650                char copybuffer[16];
1651                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1652                  i, copybuffer, sizeof(copybuffer));
1653                if (rc < 0)
1654                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1655                else
1656                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1657                }
1658              }
1659    
1660            for (i = 0; i < 32; i++)
1661              {
1662              if ((getstrings & (1 << i)) != 0)
1663                {
1664                const char *substring;
1665                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1666                  i, &substring);
1667                if (rc < 0)
1668                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1669                else
1670                  {
1671                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1672                  /* free((void *)substring); */
1673                  pcre_free_substring(substring);
1674                  }
1675                }
1676              }
1677    
1678            if (getlist)
1679              {
1680              const char **stringlist;
1681              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1682                &stringlist);
1683              if (rc < 0)
1684                fprintf(outfile, "get substring list failed %d\n", rc);
1685              else
1686                {
1687                for (i = 0; i < count; i++)
1688                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1689                if (stringlist[i] != NULL)
1690                  fprintf(outfile, "string list not terminated by NULL\n");
1691                /* free((void *)stringlist); */
1692                pcre_free_substring_list(stringlist);
1693              }              }
1694            }            }
1695          }          }
1696    
1697          /* There was a partial match */
1698    
1699          else if (count == PCRE_ERROR_PARTIAL)
1700            {
1701            fprintf(outfile, "Partial match");
1702            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1703              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1704                bptr + use_offsets[0]);
1705            fprintf(outfile, "\n");
1706            break;  /* Out of the /g loop */
1707            }
1708    
1709          /* Failed to match. If this is a /g or /G loop and we previously set
1710          g_notempty after a null match, this is not necessarily the end.
1711          We want to advance the start offset, and continue. In the case of UTF-8
1712          matching, the advance must be one character, not one byte. Fudge the
1713          offset values to achieve this. We won't be at the end of the string -
1714          that was checked before setting g_notempty. */
1715    
1716        else        else
1717          {          {
1718          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1719              {
1720              int onechar = 1;
1721              use_offsets[0] = start_offset;
1722              if (use_utf8)
1723                {
1724                while (start_offset + onechar < len)
1725                  {
1726                  int tb = bptr[start_offset+onechar];
1727                  if (tb <= 127) break;
1728                  tb &= 0xc0;
1729                  if (tb != 0 && tb != 0xc0) onechar++;
1730                  }
1731                }
1732              use_offsets[1] = start_offset + onechar;
1733              }
1734            else
1735              {
1736              if (count == PCRE_ERROR_NOMATCH)
1737                {
1738                if (gmatched == 0) fprintf(outfile, "No match\n");
1739                }
1740            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
1741              break;  /* Out of the /g loop */
1742              }
1743          }          }
1744        }  
1745      }        /* If not /g or /G we are done */
1746    
1747          if (!do_g && !do_G) break;
1748    
1749          /* If we have matched an empty string, first check to see if we are at
1750          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1751          what Perl's /g options does. This turns out to be rather cunning. First
1752          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1753          same point. If this fails (picked up above) we advance to the next
1754          character. */
1755    
1756          g_notempty = 0;
1757          if (use_offsets[0] == use_offsets[1])
1758            {
1759            if (use_offsets[0] == len) break;
1760            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1761            }
1762    
1763          /* For /g, update the start offset, leaving the rest alone */
1764    
1765          if (do_g) start_offset = use_offsets[1];
1766    
1767          /* For /G, update the pointer and length */
1768    
1769          else
1770            {
1771            bptr += use_offsets[1];
1772            len -= use_offsets[1];
1773            }
1774          }  /* End of loop for /g and /G */
1775        }    /* End of loop for data lines */
1776    
1777    CONTINUE:    CONTINUE:
1778    
1779    #if !defined NOPOSIX
1780    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1781    if (re != NULL) free(re);  #endif
1782    if (extra != NULL) free(extra);  
1783      if (re != NULL) new_free(re);
1784      if (extra != NULL) new_free(extra);
1785      if (tables != NULL)
1786        {
1787        new_free((void *)tables);
1788        setlocale(LC_CTYPE, "C");
1789        }
1790    }    }
1791    
1792  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1793  return 0;  
1794    EXIT:
1795    
1796    if (infile != NULL && infile != stdin) fclose(infile);
1797    if (outfile != NULL && outfile != stdout) fclose(outfile);
1798    
1799    free(buffer);
1800    free(dbuffer);
1801    free(pbuffer);
1802    free(offsets);
1803    
1804    return yield;
1805  }  }
1806    
1807  /* End */  /* End of pcretest.c */

Legend:
Removed from v.13  
changed lines
  Added in v.77

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12