/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 27 by nigel, Sat Feb 24 21:38:49 2007 UTC revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44  #include <locale.h>  #include <locale.h>
45    #include <errno.h>
46    
47    #define PCRE_SPY        /* For Win32 build, import data, not export */
48    
49    /* We include pcre_internal.h because we need the internal info for displaying
50    the results of pcre_study() and we also need to know about the internal
51    macros, structures, and other internal data values; pcretest has "inside
52    information" compared to a program that strictly follows the PCRE API. */
53    
54    #include "pcre_internal.h"
55    
56    /* We need access to the data tables that PCRE uses. So as not to have to keep
57    two copies, we include the source file here, changing the names of the external
58    symbols to prevent clashes. */
59    
60    #define _pcre_utf8_table1      utf8_table1
61    #define _pcre_utf8_table1_size utf8_table1_size
62    #define _pcre_utf8_table2      utf8_table2
63    #define _pcre_utf8_table3      utf8_table3
64    #define _pcre_utf8_table4      utf8_table4
65    #define _pcre_utt              utt
66    #define _pcre_utt_size         utt_size
67    #define _pcre_OP_lengths       OP_lengths
68    
69    #include "pcre_tables.c"
70    
71  /* Use the internal info for displaying the results of pcre_study(). */  /* We also need the pcre_printint() function for printing out compiled
72    patterns. This function is in a separate file so that it can be included in
73    pcre_compile.c when that module is compiled with debugging enabled. */
74    
75  #include "internal.h"  #include "pcre_printint.src"
76    
77    
78    /* It is possible to compile this test program without including support for
79    testing the POSIX interface, though this is not available via the standard
80    Makefile. */
81    
82    #if !defined NOPOSIX
83  #include "pcreposix.h"  #include "pcreposix.h"
84    #endif
85    
86    /* It is also possible, for the benefit of the version imported into Exim, to
87    build pcretest without support for UTF8 (define NOUTF8), without the interface
88    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
89    function (define NOINFOCHECK). */
90    
91    
92    /* Other parameters */
93    
94  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
95  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 22  Line 99 
99  #endif  #endif
100  #endif  #endif
101    
102  #define LOOPREPEAT 20000  #define LOOPREPEAT 500000
103    
104    #define BUFFER_SIZE 30000
105    #define PBUFFER_SIZE BUFFER_SIZE
106    #define DBUFFER_SIZE BUFFER_SIZE
107    
108    
109    /* Static variables */
110    
111  static FILE *outfile;  static FILE *outfile;
112  static int log_store = 0;  static int log_store = 0;
113    static int callout_count;
114    static int callout_extra;
115    static int callout_fail_count;
116    static int callout_fail_id;
117    static int first_callout;
118    static int show_malloc;
119    static int use_utf8;
120    static size_t gotten_store;
121    
122    static uschar *pbuffer = NULL;
123    
124    
 /* Debugging function to print the internal form of the regex. This is the same  
 code as contained in pcre.c under the DEBUG macro. */  
125    
126  static const char *OP_names[] = {  /*************************************************
127    "End", "\\A", "\\B", "\\b", "\\D", "\\d",  *          Read number from string               *
128    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",  *************************************************/
129    "Opt", "^", "$", "Any", "chars", "not",  
130    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
131    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  around with conditional compilation, just do the job by hand. It is only used
132    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  for unpicking the -o argument, so just keep it simple.
133    "*", "*?", "+", "+?", "?", "??", "{", "{",  
134    "class", "Ref",  Arguments:
135    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    str           string to be converted
136    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    endptr        where to put the end pointer
137    "Brazero", "Braminzero", "Bra"  
138  };  Returns:        the unsigned long
139    */
140    
141  static void print_internals(pcre *re, FILE *outfile)  static int
142  {  get_value(unsigned char *str, unsigned char **endptr)
143  unsigned char *code = ((real_pcre *)re)->code;  {
144    int result = 0;
145  fprintf(outfile, "------------------------------------------------------------------\n");  while(*str != 0 && isspace(*str)) str++;
146    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
147  for(;;)  *endptr = str;
148    {  return(result);
149    int c;  }
150    int charlength;  
151    
152    fprintf(outfile, "%3d ", (int)(code - ((real_pcre *)re)->code));  
153    
154    if (*code >= OP_BRA)  /*************************************************
155      {  *            Convert UTF-8 string to value       *
156      fprintf(outfile, "%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  *************************************************/
157      code += 2;  
158      }  /* This function takes one or more bytes that represents a UTF-8 character,
159    and returns the value of the character.
160    else switch(*code)  
161      {  Argument:
162      case OP_END:    buffer   a pointer to the byte vector
163      fprintf(outfile, "    %s\n", OP_names[*code]);    vptr     a pointer to an int to receive the value
164      fprintf(outfile, "------------------------------------------------------------------\n");  
165      return;  Returns:   >  0 => the number of bytes consumed
166               -6 to 0 => malformed UTF-8 character at offset = (-return)
167      case OP_OPT:  */
168      fprintf(outfile, " %.2x %s", code[1], OP_names[*code]);  
169      code++;  #if !defined NOUTF8
170      break;  
171    static int
172      case OP_COND:  utf82ord(unsigned char *buffer, int *vptr)
173      fprintf(outfile, "%3d Cond", (code[1] << 8) + code[2]);  {
174      code += 2;  int c = *buffer++;
175      break;  int d = c;
176    int i, j, s;
177      case OP_CREF:  
178      fprintf(outfile, " %.2d %s", code[1], OP_names[*code]);  for (i = -1; i < 6; i++)               /* i is number of additional bytes */
179      code++;    {
180      break;    if ((d & 0x80) == 0) break;
181      d <<= 1;
182      case OP_CHARS:    }
183      charlength = *(++code);  
184      fprintf(outfile, "%3d ", charlength);  if (i == -1) { *vptr = c; return 1; }  /* ascii character */
185      while (charlength-- > 0)  if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
186        if (isprint(c = *(++code))) fprintf(outfile, "%c", c);  
187          else fprintf(outfile, "\\x%02x", c);  /* i now has a value in the range 1-5 */
188      break;  
189    s = 6*i;
190      case OP_KETRMAX:  d = (c & utf8_table3[i]) << s;
191      case OP_KETRMIN:  
192      case OP_ALT:  for (j = 0; j < i; j++)
193      case OP_KET:    {
194      case OP_ASSERT:    c = *buffer++;
195      case OP_ASSERT_NOT:    if ((c & 0xc0) != 0x80) return -(j+1);
196      case OP_ASSERTBACK:    s -= 6;
197      case OP_ASSERTBACK_NOT:    d |= (c & 0x3f) << s;
198      case OP_ONCE:    }
199      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
200      code += 2;  /* Check that encoding was the correct unique one */
201      break;  
202    for (j = 0; j < utf8_table1_size; j++)
203      case OP_REVERSE:    if (d <= utf8_table1[j]) break;
204      fprintf(outfile, "%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  if (j != i) return -(i+1);
205      code += 2;  
206      break;  /* Valid value */
207    
208      case OP_STAR:  *vptr = d;
209      case OP_MINSTAR:  return i+1;
210      case OP_PLUS:  }
211      case OP_MINPLUS:  
212      case OP_QUERY:  #endif
213      case OP_MINQUERY:  
214      case OP_TYPESTAR:  
215      case OP_TYPEMINSTAR:  
216      case OP_TYPEPLUS:  /*************************************************
217      case OP_TYPEMINPLUS:  *       Convert character value to UTF-8         *
218      case OP_TYPEQUERY:  *************************************************/
219      case OP_TYPEMINQUERY:  
220      if (*code >= OP_TYPESTAR)  /* This function takes an integer value in the range 0 - 0x7fffffff
221        fprintf(outfile, "    %s", OP_names[code[1]]);  and encodes it as a UTF-8 character in 0 to 6 bytes.
222      else if (isprint(c = code[1])) fprintf(outfile, "    %c", c);  
223        else fprintf(outfile, "    \\x%02x", c);  Arguments:
224      fprintf(outfile, "%s", OP_names[*code++]);    cvalue     the character value
225      break;    buffer     pointer to buffer for result - at least 6 bytes long
226    
227      case OP_EXACT:  Returns:     number of characters placed in the buffer
228      case OP_UPTO:  */
229      case OP_MINUPTO:  
230      if (isprint(c = code[3])) fprintf(outfile, "    %c{", c);  static int
231        else fprintf(outfile, "    \\x%02x{", c);  ord2utf8(int cvalue, uschar *buffer)
232      if (*code != OP_EXACT) fprintf(outfile, ",");  {
233      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  register int i, j;
234      if (*code == OP_MINUPTO) fprintf(outfile, "?");  for (i = 0; i < utf8_table1_size; i++)
235      code += 3;    if (cvalue <= utf8_table1[i]) break;
236      break;  buffer += i;
237    for (j = i; j > 0; j--)
238      case OP_TYPEEXACT:   {
239      case OP_TYPEUPTO:   *buffer-- = 0x80 | (cvalue & 0x3f);
240      case OP_TYPEMINUPTO:   cvalue >>= 6;
241      fprintf(outfile, "    %s{", OP_names[code[3]]);   }
242      if (*code != OP_TYPEEXACT) fprintf(outfile, "0,");  *buffer = utf8_table2[i] | cvalue;
243      fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  return i + 1;
244      if (*code == OP_TYPEMINUPTO) fprintf(outfile, "?");  }
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) fprintf(outfile, "    [^%c]", c);  
       else fprintf(outfile, "    [^\\x%02x]", c);  
     fprintf(outfile, "%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) fprintf(outfile, "    [^%c]{", c);  
       else fprintf(outfile, "    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) fprintf(outfile, ",");  
     fprintf(outfile, "%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) fprintf(outfile, "?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     fprintf(outfile, "    \\%d", *(++code));  
     code++;  
     goto CLASS_REF_REPEAT;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
       code++;  
       fprintf(outfile, "    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') fprintf(outfile, "\\");  
           if (isprint(i)) fprintf(outfile, "%c", i); else fprintf(outfile, "\\x%02x", i);  
           if (--j > i)  
             {  
             fprintf(outfile, "-");  
             if (j == '-' || j == ']') fprintf(outfile, "\\");  
             if (isprint(j)) fprintf(outfile, "%c", j); else fprintf(outfile, "\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       fprintf(outfile, "]");  
       code += 32;  
   
       CLASS_REF_REPEAT:  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         fprintf(outfile, "%s", OP_names[*code]);  
         break;  
245    
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         min = (code[1] << 8) + code[2];  
         max = (code[3] << 8) + code[4];  
         if (max == 0) fprintf(outfile, "{%d,}", min);  
         else fprintf(outfile, "{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) fprintf(outfile, "?");  
         code += 4;  
         break;  
246    
247          default:  
248          code--;  /*************************************************
249    *             Print character string             *
250    *************************************************/
251    
252    /* Character string printing function. Must handle UTF-8 strings in utf8
253    mode. Yields number of characters printed. If handed a NULL file, just counts
254    chars without printing. */
255    
256    static int pchars(unsigned char *p, int length, FILE *f)
257    {
258    int c = 0;
259    int yield = 0;
260    
261    while (length-- > 0)
262      {
263    #if !defined NOUTF8
264      if (use_utf8)
265        {
266        int rc = utf82ord(p, &c);
267    
268        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
269          {
270          length -= rc - 1;
271          p += rc;
272          if (c < 256 && isprint(c))
273            {
274            if (f != NULL) fprintf(f, "%c", c);
275            yield++;
276          }          }
277          else
278            {
279            int n;
280            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
281            yield += n;
282            }
283          continue;
284        }        }
285      break;      }
286    #endif
287    
288      /* Anything else is just a one-node item */     /* Not UTF-8, or malformed UTF-8  */
289    
290      default:    if (isprint(c = *(p++)))
291      fprintf(outfile, "    %s", OP_names[*code]);      {
292      break;      if (f != NULL) fprintf(f, "%c", c);
293        yield++;
294        }
295      else
296        {
297        if (f != NULL) fprintf(f, "\\x%02x", c);
298        yield += 4;
299      }      }
   
   code++;  
   fprintf(outfile, "\n");  
300    }    }
301    
302    return yield;
303  }  }
304    
305    
306    
307  /* Character string printing function. */  /*************************************************
308    *              Callout function                  *
309    *************************************************/
310    
311  static void pchars(unsigned char *p, int length)  /* Called from PCRE as a result of the (?C) item. We print out where we are in
312    the match. Yield zero unless more callouts than the fail count, or the callout
313    data is not zero. */
314    
315    static int callout(pcre_callout_block *cb)
316  {  {
317  int c;  FILE *f = (first_callout | callout_extra)? outfile : NULL;
318  while (length-- > 0)  int i, pre_start, post_start, subject_length;
319    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);  
320      else fprintf(outfile, "\\x%02x", c);  if (callout_extra)
321      {
322      fprintf(f, "Callout %d: last capture = %d\n",
323        cb->callout_number, cb->capture_last);
324    
325      for (i = 0; i < cb->capture_top * 2; i += 2)
326        {
327        if (cb->offset_vector[i] < 0)
328          fprintf(f, "%2d: <unset>\n", i/2);
329        else
330          {
331          fprintf(f, "%2d: ", i/2);
332          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
333            cb->offset_vector[i+1] - cb->offset_vector[i], f);
334          fprintf(f, "\n");
335          }
336        }
337      }
338    
339    /* Re-print the subject in canonical form, the first time or if giving full
340    datails. On subsequent calls in the same match, we use pchars just to find the
341    printed lengths of the substrings. */
342    
343    if (f != NULL) fprintf(f, "--->");
344    
345    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
346    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
347      cb->current_position - cb->start_match, f);
348    
349    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
350    
351    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
352      cb->subject_length - cb->current_position, f);
353    
354    if (f != NULL) fprintf(f, "\n");
355    
356    /* Always print appropriate indicators, with callout number if not already
357    shown. For automatic callouts, show the pattern offset. */
358    
359    if (cb->callout_number == 255)
360      {
361      fprintf(outfile, "%+3d ", cb->pattern_position);
362      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
363      }
364    else
365      {
366      if (callout_extra) fprintf(outfile, "    ");
367        else fprintf(outfile, "%3d ", cb->callout_number);
368      }
369    
370    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
371    fprintf(outfile, "^");
372    
373    if (post_start > 0)
374      {
375      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
376      fprintf(outfile, "^");
377      }
378    
379    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
380      fprintf(outfile, " ");
381    
382    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
383      pbuffer + cb->pattern_position);
384    
385    fprintf(outfile, "\n");
386    first_callout = 0;
387    
388    if (cb->callout_data != NULL)
389      {
390      int callout_data = *((int *)(cb->callout_data));
391      if (callout_data != 0)
392        {
393        fprintf(outfile, "Callout data = %d\n", callout_data);
394        return callout_data;
395        }
396      }
397    
398    return (cb->callout_number != callout_fail_id)? 0 :
399           (++callout_count >= callout_fail_count)? 1 : 0;
400  }  }
401    
402    
403    /*************************************************
404    *            Local malloc functions              *
405    *************************************************/
406    
407  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
408  compiled re. */  compiled re. */
409    
410  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
411  {  {
412  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
413  return malloc(size);  gotten_store = size;
414    if (show_malloc)
415      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
416    return block;
417  }  }
418    
419    static void new_free(void *block)
420    {
421    if (show_malloc)
422      fprintf(outfile, "free             %p\n", block);
423    free(block);
424    }
425    
426    
427    /* For recursion malloc/free, to test stacking calls */
428    
429    static void *stack_malloc(size_t size)
430    {
431    void *block = malloc(size);
432    if (show_malloc)
433      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
434    return block;
435    }
436    
437    static void stack_free(void *block)
438    {
439    if (show_malloc)
440      fprintf(outfile, "stack_free       %p\n", block);
441    free(block);
442    }
443    
444    
445    /*************************************************
446    *          Call pcre_fullinfo()                  *
447    *************************************************/
448    
449    /* Get one piece of information from the pcre_fullinfo() function */
450    
451    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
452    {
453    int rc;
454    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
455      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
456    }
457    
458    
459    
460    /*************************************************
461    *         Byte flipping function                 *
462    *************************************************/
463    
464    static long int
465    byteflip(long int value, int n)
466    {
467    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
468    return ((value & 0x000000ff) << 24) |
469           ((value & 0x0000ff00) <<  8) |
470           ((value & 0x00ff0000) >>  8) |
471           ((value & 0xff000000) >> 24);
472    }
473    
474    
475    
476    
477    /*************************************************
478    *                Main Program                    *
479    *************************************************/
480    
481  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
482  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 292  int study_options = 0; Line 490  int study_options = 0;
490  int op = 1;  int op = 1;
491  int timeit = 0;  int timeit = 0;
492  int showinfo = 0;  int showinfo = 0;
493    int showstore = 0;
494    int size_offsets = 45;
495    int size_offsets_max;
496    int *offsets = NULL;
497    #if !defined NOPOSIX
498  int posix = 0;  int posix = 0;
499    #endif
500  int debug = 0;  int debug = 0;
501  int done = 0;  int done = 0;
502  unsigned char buffer[30000];  int all_use_dfa = 0;
503  unsigned char dbuffer[1024];  int yield = 0;
504    
505    unsigned char *buffer;
506    unsigned char *dbuffer;
507    
508    /* Get buffers from malloc() so that Electric Fence will check their misuse
509    when I am debugging. */
510    
511  /* Static so that new_malloc can use it. */  buffer = (unsigned char *)malloc(BUFFER_SIZE);
512    dbuffer = (unsigned char *)malloc(DBUFFER_SIZE);
513    pbuffer = (unsigned char *)malloc(PBUFFER_SIZE);
514    
515    /* The outfile variable is static so that new_malloc can use it. The _setmode()
516    stuff is some magic that I don't understand, but which apparently does good
517    things in Windows. It's related to line terminations.  */
518    
519    #if defined(_WIN32) || defined(WIN32)
520    _setmode( _fileno( stdout ), 0x8000 );
521    #endif  /* defined(_WIN32) || defined(WIN32) */
522    
523  outfile = stdout;  outfile = stdout;
524    
# Line 306  outfile = stdout; Line 526  outfile = stdout;
526    
527  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
528    {    {
529    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
530    
531      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
532        showstore = 1;
533    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
534    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
535    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
536    #if !defined NODFA
537      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
538    #endif
539      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
540          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
541            *endptr == 0))
542        {
543        op++;
544        argc--;
545        }
546    #if !defined NOPOSIX
547    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
548    #endif
549      else if (strcmp(argv[op], "-C") == 0)
550        {
551        int rc;
552        printf("PCRE version %s\n", pcre_version());
553        printf("Compiled with\n");
554        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
555        printf("  %sUTF-8 support\n", rc? "" : "No ");
556        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
557        printf("  %sUnicode properties support\n", rc? "" : "No ");
558        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
559        printf("  Newline character is %s\n", (rc == '\r')? "CR" : "LF");
560        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
561        printf("  Internal link size = %d\n", rc);
562        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
563        printf("  POSIX malloc threshold = %d\n", rc);
564        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
565        printf("  Default match limit = %d\n", rc);
566        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
567        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
568        exit(0);
569        }
570    else    else
571      {      {
572      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
573      printf("Usage: pcretest [-d] [-i] [-p] [-s] [-t] [<input> [<output>]]\n");      printf("Usage:   pcretest [-d] [-i] [-o <n>] [-p] [-s] [-t] [<input> [<output>]]\n");
574      printf("  -d   debug: show compiled code; implies -i\n"      printf("  -C     show PCRE compile-time options and exit\n");
575             "  -i   show information about compiled pattern\n"      printf("  -d     debug: show compiled code; implies -i\n");
576             "  -p   use POSIX interface\n"  #if !defined NODFA
577             "  -s   output store information\n"      printf("  -dfa   force DFA matching for all subjects\n");
578             "  -t   time compilation and execution\n");  #endif
579      return 1;      printf("  -i     show information about compiled pattern\n"
580               "  -m     output memory used information\n"
581               "  -o <n> set size of offsets vector to <n>\n");
582    #if !defined NOPOSIX
583        printf("  -p     use POSIX interface\n");
584    #endif
585        printf("  -s     output store (memory) used information\n"
586               "  -t     time compilation and execution\n");
587        yield = 1;
588        goto EXIT;
589      }      }
590    op++;    op++;
591    argc--;    argc--;
592    }    }
593    
594    /* Get the store for the offsets vector, and remember what it was */
595    
596    size_offsets_max = size_offsets;
597    offsets = (int *)malloc(size_offsets_max * sizeof(int));
598    if (offsets == NULL)
599      {
600      printf("** Failed to get %d bytes of memory for offsets vector\n",
601        size_offsets_max * sizeof(int));
602      yield = 1;
603      goto EXIT;
604      }
605    
606  /* Sort out the input and output files */  /* Sort out the input and output files */
607    
608  if (argc > 1)  if (argc > 1)
609    {    {
610    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
611    if (infile == NULL)    if (infile == NULL)
612      {      {
613      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
614      return 1;      yield = 1;
615        goto EXIT;
616      }      }
617    }    }
618    
619  if (argc > 2)  if (argc > 2)
620    {    {
621    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
622    if (outfile == NULL)    if (outfile == NULL)
623      {      {
624      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
625      return 1;      yield = 1;
626        goto EXIT;
627      }      }
628    }    }
629    
630  /* Set alternative malloc function */  /* Set alternative malloc function */
631    
632  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
633    pcre_free = new_free;
634    pcre_stack_malloc = stack_malloc;
635    pcre_stack_free = stack_free;
636    
637  /* Heading line, then prompt for first regex if stdin */  /* Heading line, then prompt for first regex if stdin */
638    
# Line 362  while (!done) Line 644  while (!done)
644    {    {
645    pcre *re = NULL;    pcre *re = NULL;
646    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
647    
648    #if !defined NOPOSIX  /* There are still compilers that require no indent */
649    regex_t preg;    regex_t preg;
650      int do_posix = 0;
651    #endif
652    
653    const char *error;    const char *error;
654    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
655    unsigned const char *tables = NULL;    unsigned char *to_file = NULL;
656      const unsigned char *tables = NULL;
657      unsigned long int true_size, true_study_size = 0;
658      size_t size, regex_gotten_store;
659    int do_study = 0;    int do_study = 0;
660    int do_debug = debug;    int do_debug = debug;
661      int do_G = 0;
662      int do_g = 0;
663    int do_showinfo = showinfo;    int do_showinfo = showinfo;
664    int do_posix = 0;    int do_showrest = 0;
665      int do_flip = 0;
666    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
667    
668      use_utf8 = 0;
669    
670    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
671    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL) break;
672    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
673      fflush(outfile);
674    
675    p = buffer;    p = buffer;
676    while (isspace(*p)) p++;    while (isspace(*p)) p++;
677    if (*p == 0) continue;    if (*p == 0) continue;
678    
679    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
680    complete, read more. */  
681      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
682        {
683        unsigned long int magic;
684        uschar sbuf[8];
685        FILE *f;
686    
687        p++;
688        pp = p + (int)strlen((char *)p);
689        while (isspace(pp[-1])) pp--;
690        *pp = 0;
691    
692        f = fopen((char *)p, "rb");
693        if (f == NULL)
694          {
695          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
696          continue;
697          }
698    
699        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
700    
701        true_size =
702          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
703        true_study_size =
704          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
705    
706        re = (real_pcre *)new_malloc(true_size);
707        regex_gotten_store = gotten_store;
708    
709        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
710    
711        magic = ((real_pcre *)re)->magic_number;
712        if (magic != MAGIC_NUMBER)
713          {
714          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
715            {
716            do_flip = 1;
717            }
718          else
719            {
720            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
721            fclose(f);
722            continue;
723            }
724          }
725    
726        fprintf(outfile, "Compiled regex%s loaded from %s\n",
727          do_flip? " (byte-inverted)" : "", p);
728    
729        /* Need to know if UTF-8 for printing data strings */
730    
731        new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
732        use_utf8 = (options & PCRE_UTF8) != 0;
733    
734        /* Now see if there is any following study data */
735    
736        if (true_study_size != 0)
737          {
738          pcre_study_data *psd;
739    
740          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
741          extra->flags = PCRE_EXTRA_STUDY_DATA;
742    
743          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
744          extra->study_data = psd;
745    
746          if (fread(psd, 1, true_study_size, f) != true_study_size)
747            {
748            FAIL_READ:
749            fprintf(outfile, "Failed to read data from %s\n", p);
750            if (extra != NULL) new_free(extra);
751            if (re != NULL) new_free(re);
752            fclose(f);
753            continue;
754            }
755          fprintf(outfile, "Study data loaded from %s\n", p);
756          do_study = 1;     /* To get the data output if requested */
757          }
758        else fprintf(outfile, "No study data\n");
759    
760        fclose(f);
761        goto SHOW_INFO;
762        }
763    
764      /* In-line pattern (the usual case). Get the delimiter and seek the end of
765      the pattern; if is isn't complete, read more. */
766    
767    delimiter = *p++;    delimiter = *p++;
768    
769    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
770      {      {
771      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
772      goto SKIP_DATA;      goto SKIP_DATA;
773      }      }
774    
# Line 395  while (!done) Line 776  while (!done)
776    
777    for(;;)    for(;;)
778      {      {
779      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
780          {
781          if (*pp == '\\' && pp[1] != 0) pp++;
782            else if (*pp == delimiter) break;
783          pp++;
784          }
785      if (*pp != 0) break;      if (*pp != 0) break;
786    
787      len = sizeof(buffer) - (pp - buffer);      len = BUFFER_SIZE - (pp - buffer);
788      if (len < 256)      if (len < 256)
789        {        {
790        fprintf(outfile, "** Expression too long - missing delimiter?\n");        fprintf(outfile, "** Expression too long - missing delimiter?\n");
# Line 415  while (!done) Line 801  while (!done)
801      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
802      }      }
803    
804    /* Terminate the pattern at the delimiter */    /* If the first character after the delimiter is backslash, make
805      the pattern end with backslash. This is purely to provide a way
806      of testing for the error message when a pattern ends with backslash. */
807    
808      if (pp[1] == '\\') *pp++ = '\\';
809    
810      /* Terminate the pattern at the delimiter, and save a copy of the pattern
811      for callouts. */
812    
813    *pp++ = 0;    *pp++ = 0;
814      strcpy((char *)pbuffer, (char *)p);
815    
816    /* Look for options after final delimiter */    /* Look for options after final delimiter */
817    
818    options = 0;    options = 0;
819    study_options = 0;    study_options = 0;
820      log_store = showstore;  /* default from command line */
821    
822    while (*pp != 0)    while (*pp != 0)
823      {      {
824      switch (*pp++)      switch (*pp++)
825        {        {
826          case 'f': options |= PCRE_FIRSTLINE; break;
827          case 'g': do_g = 1; break;
828        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
829        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
830        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
831        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
832    
833          case '+': do_showrest = 1; break;
834        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
835          case 'C': options |= PCRE_AUTO_CALLOUT; break;
836        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
837        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
838          case 'F': do_flip = 1; break;
839          case 'G': do_G = 1; break;
840        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
841          case 'M': log_store = 1; break;
842          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
843    
844    #if !defined NOPOSIX
845        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
846    #endif
847    
848        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
849        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
850        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
851          case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
852          case '?': options |= PCRE_NO_UTF8_CHECK; break;
853    
854        case 'L':        case 'L':
855        ppp = pp;        ppp = pp;
856        while (*ppp != '\n' && *ppp != ' ') ppp++;        /* The '\r' test here is so that it works on Windows */
857          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
858        *ppp = 0;        *ppp = 0;
859        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)        if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
860          {          {
# Line 454  while (!done) Line 865  while (!done)
865        pp = ppp;        pp = ppp;
866        break;        break;
867    
868        case '\n': case ' ': break;        case '>':
869          to_file = pp;
870          while (*pp != 0) pp++;
871          while (isspace(pp[-1])) pp--;
872          *pp = 0;
873          break;
874    
875          case '\r':                      /* So that it works in Windows */
876          case '\n':
877          case ' ':
878          break;
879    
880        default:        default:
881        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
882        goto SKIP_DATA;        goto SKIP_DATA;
# Line 465  while (!done) Line 887  while (!done)
887    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
888    local character tables. */    local character tables. */
889    
890    #if !defined NOPOSIX
891    if (posix || do_posix)    if (posix || do_posix)
892      {      {
893      int rc;      int rc;
894      int cflags = 0;      int cflags = 0;
895    
896      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
897      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
898        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
899      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
900    
901      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 478  while (!done) Line 903  while (!done)
903    
904      if (rc != 0)      if (rc != 0)
905        {        {
906        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
907        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
908        goto SKIP_DATA;        goto SKIP_DATA;
909        }        }
# Line 487  while (!done) Line 912  while (!done)
912    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
913    
914    else    else
915    #endif  /* !defined NOPOSIX */
916    
917      {      {
918      if (timeit)      if (timeit)
919        {        {
# Line 500  while (!done) Line 927  while (!done)
927          }          }
928        time_taken = clock() - start_time;        time_taken = clock() - start_time;
929        fprintf(outfile, "Compile time %.3f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
930          ((double)time_taken * 1000.0) /          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
931          ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            (double)CLOCKS_PER_SEC);
932        }        }
933    
934      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
# Line 517  while (!done) Line 944  while (!done)
944          {          {
945          for (;;)          for (;;)
946            {            {
947            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
948              {              {
949              done = 1;              done = 1;
950              goto CONTINUE;              goto CONTINUE;
# Line 531  while (!done) Line 958  while (!done)
958        goto CONTINUE;        goto CONTINUE;
959        }        }
960    
961      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
962        info-returning functions. The old one has a limited interface and
963        returns only limited data. Check that it agrees with the newer one. */
964    
965        if (log_store)
966          fprintf(outfile, "Memory allocation (code space): %d\n",
967            (int)(gotten_store -
968                  sizeof(real_pcre) -
969                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
970    
971      if (do_showinfo)      /* Extract the size for possible writing before possibly flipping it,
972        {      and remember the store that was got. */
       int first_char, count;  
973    
974        if (do_debug) print_internals(re, outfile);      true_size = ((real_pcre *)re)->size;
975        regex_gotten_store = gotten_store;
       count = pcre_info(re, &options, &first_char);  
       if (count < 0) fprintf(outfile,  
         "Error %d while reading info\n", count);  
       else  
         {  
         fprintf(outfile, "Identifying subpattern count = %d\n", count);  
         if (options == 0) fprintf(outfile, "No options\n");  
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "",  
             ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");  
         if (first_char == -1)  
           {  
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
         else  
           {  
           if (isprint(first_char))  
             fprintf(outfile, "First char = \'%c\'\n", first_char);  
           else  
             fprintf(outfile, "First char = %d\n", first_char);  
           }  
         }  
       }  
976    
977      /* If /S was present, study the regexp to generate additional info to      /* If /S was present, study the regexp to generate additional info to
978      help with the matching. */      help with the matching. */
# Line 588  while (!done) Line 989  while (!done)
989          time_taken = clock() - start_time;          time_taken = clock() - start_time;
990          if (extra != NULL) free(extra);          if (extra != NULL) free(extra);
991          fprintf(outfile, "  Study time %.3f milliseconds\n",          fprintf(outfile, "  Study time %.3f milliseconds\n",
992            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
993            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
994          }          }
   
995        extra = pcre_study(re, study_options, &error);        extra = pcre_study(re, study_options, &error);
996        if (error != NULL)        if (error != NULL)
997          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
998        else if (extra == NULL)        else if (extra != NULL)
999          fprintf(outfile, "Study returned NULL\n");          true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1000          }
1001    
1002        /* If the 'F' option was present, we flip the bytes of all the integer
1003        fields in the regex data block and the study block. This is to make it
1004        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1005        compiled on a different architecture. */
1006    
1007        if (do_flip)
1008          {
1009          real_pcre *rre = (real_pcre *)re;
1010          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1011          rre->size = byteflip(rre->size, sizeof(rre->size));
1012          rre->options = byteflip(rre->options, sizeof(rre->options));
1013          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1014          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1015          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1016          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1017          rre->name_table_offset = byteflip(rre->name_table_offset,
1018            sizeof(rre->name_table_offset));
1019          rre->name_entry_size = byteflip(rre->name_entry_size,
1020            sizeof(rre->name_entry_size));
1021          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1022    
1023          if (extra != NULL)
1024            {
1025            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1026            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1027            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1028            }
1029          }
1030    
1031        /* This looks at internal information. A bit kludgy to do it this      /* Extract information from the compiled data if required */
       way, but it is useful for testing. */  
1032    
1033        else if (do_showinfo)      SHOW_INFO:
1034    
1035        if (do_showinfo)
1036          {
1037          unsigned long int get_options, all_options;
1038    #if !defined NOINFOCHECK
1039          int old_first_char, old_options, old_count;
1040    #endif
1041          int count, backrefmax, first_char, need_char;
1042          int nameentrysize, namecount;
1043          const uschar *nametable;
1044    
1045          if (do_debug)
1046            {
1047            fprintf(outfile, "------------------------------------------------------------------\n");
1048            pcre_printint(re, outfile);
1049            }
1050    
1051          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1052          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1053          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1054          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1055          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1056          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1057          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1058          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1059          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1060    
1061    #if !defined NOINFOCHECK
1062          old_count = pcre_info(re, &old_options, &old_first_char);
1063          if (count < 0) fprintf(outfile,
1064            "Error %d from pcre_info()\n", count);
1065          else
1066            {
1067            if (old_count != count) fprintf(outfile,
1068              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1069                old_count);
1070    
1071            if (old_first_char != first_char) fprintf(outfile,
1072              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1073                first_char, old_first_char);
1074    
1075            if (old_options != (int)get_options) fprintf(outfile,
1076              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1077                get_options, old_options);
1078            }
1079    #endif
1080    
1081          if (size != regex_gotten_store) fprintf(outfile,
1082            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1083            (int)size, (int)regex_gotten_store);
1084    
1085          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1086          if (backrefmax > 0)
1087            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1088    
1089          if (namecount > 0)
1090            {
1091            fprintf(outfile, "Named capturing subpatterns:\n");
1092            while (namecount-- > 0)
1093              {
1094              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1095                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1096                GET2(nametable, 0));
1097              nametable += nameentrysize;
1098              }
1099            }
1100    
1101          /* The NOPARTIAL bit is a private bit in the options, so we have
1102          to fish it out via out back door */
1103    
1104          all_options = ((real_pcre *)re)->options;
1105          if (do_flip)
1106            {
1107            all_options = byteflip(all_options, sizeof(all_options));
1108            }
1109    
1110          if ((all_options & PCRE_NOPARTIAL) != 0)
1111            fprintf(outfile, "Partial matching not supported\n");
1112    
1113          if (get_options == 0) fprintf(outfile, "No options\n");
1114            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s\n",
1115              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1116              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1117              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1118              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1119              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1120              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1121              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1122              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1123              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1124              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1125              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "");
1126    
1127          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
1128            fprintf(outfile, "Case state changes\n");
1129    
1130          if (first_char == -1)
1131            {
1132            fprintf(outfile, "First char at start or follows \\n\n");
1133            }
1134          else if (first_char < 0)
1135            {
1136            fprintf(outfile, "No first char\n");
1137            }
1138          else
1139            {
1140            int ch = first_char & 255;
1141            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1142              "" : " (caseless)";
1143            if (isprint(ch))
1144              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1145            else
1146              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1147            }
1148    
1149          if (need_char < 0)
1150            {
1151            fprintf(outfile, "No need char\n");
1152            }
1153          else
1154            {
1155            int ch = need_char & 255;
1156            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1157              "" : " (caseless)";
1158            if (isprint(ch))
1159              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1160            else
1161              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1162            }
1163    
1164          /* Don't output study size; at present it is in any case a fixed
1165          value, but it varies, depending on the computer architecture, and
1166          so messes up the test suite. (And with the /F option, it might be
1167          flipped.) */
1168    
1169          if (do_study)
1170          {          {
1171          real_pcre_extra *xx = (real_pcre_extra *)extra;          if (extra == NULL)
1172          if ((xx->options & PCRE_STUDY_MAPPED) == 0)            fprintf(outfile, "Study returned NULL\n");
           fprintf(outfile, "No starting character set\n");  
1173          else          else
1174            {            {
1175            int i;            uschar *start_bits = NULL;
1176            int c = 24;            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1177            fprintf(outfile, "Starting character set: ");  
1178            for (i = 0; i < 256; i++)            if (start_bits == NULL)
1179                fprintf(outfile, "No starting byte set\n");
1180              else
1181              {              {
1182              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              int i;
1183                int c = 24;
1184                fprintf(outfile, "Starting byte set: ");
1185                for (i = 0; i < 256; i++)
1186                {                {
1187                if (c > 75)                if ((start_bits[i/8] & (1<<(i&7))) != 0)
1188                  {                  {
1189                  fprintf(outfile, "\n  ");                  if (c > 75)
1190                  c = 2;                    {
1191                  }                    fprintf(outfile, "\n  ");
1192                if (isprint(i) && i != ' ')                    c = 2;
1193                  {                    }
1194                  fprintf(outfile, "%c ", i);                  if (isprint(i) && i != ' ')
1195                  c += 2;                    {
1196                  }                    fprintf(outfile, "%c ", i);
1197                else                    c += 2;
1198                  {                    }
1199                  fprintf(outfile, "\\x%02x ", i);                  else
1200                  c += 5;                    {
1201                      fprintf(outfile, "\\x%02x ", i);
1202                      c += 5;
1203                      }
1204                  }                  }
1205                }                }
1206                fprintf(outfile, "\n");
1207              }              }
           fprintf(outfile, "\n");  
1208            }            }
1209          }          }
1210        }        }
1211      }  
1212        /* If the '>' option was present, we write out the regex to a file, and
1213        that is all. The first 8 bytes of the file are the regex length and then
1214        the study length, in big-endian order. */
1215    
1216        if (to_file != NULL)
1217          {
1218          FILE *f = fopen((char *)to_file, "wb");
1219          if (f == NULL)
1220            {
1221            fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1222            }
1223          else
1224            {
1225            uschar sbuf[8];
1226            sbuf[0] = (true_size >> 24)  & 255;
1227            sbuf[1] = (true_size >> 16)  & 255;
1228            sbuf[2] = (true_size >>  8)  & 255;
1229            sbuf[3] = (true_size)  & 255;
1230    
1231            sbuf[4] = (true_study_size >> 24)  & 255;
1232            sbuf[5] = (true_study_size >> 16)  & 255;
1233            sbuf[6] = (true_study_size >>  8)  & 255;
1234            sbuf[7] = (true_study_size)  & 255;
1235    
1236            if (fwrite(sbuf, 1, 8, f) < 8 ||
1237                fwrite(re, 1, true_size, f) < true_size)
1238              {
1239              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1240              }
1241            else
1242              {
1243              fprintf(outfile, "Compiled regex written to %s\n", to_file);
1244              if (extra != NULL)
1245                {
1246                if (fwrite(extra->study_data, 1, true_study_size, f) <
1247                    true_study_size)
1248                  {
1249                  fprintf(outfile, "Write error on %s: %s\n", to_file,
1250                    strerror(errno));
1251                  }
1252                else fprintf(outfile, "Study data written to %s\n", to_file);
1253                }
1254              }
1255            fclose(f);
1256            }
1257    
1258          new_free(re);
1259          if (extra != NULL) new_free(extra);
1260          if (tables != NULL) new_free((void *)tables);
1261          continue;  /* With next regex */
1262          }
1263        }        /* End of non-POSIX compile */
1264    
1265    /* Read data lines and test them */    /* Read data lines and test them */
1266    
1267    for (;;)    for (;;)
1268      {      {
1269      unsigned char *q;      unsigned char *q;
1270        unsigned char *bptr = dbuffer;
1271        int *use_offsets = offsets;
1272        int use_size_offsets = size_offsets;
1273        int callout_data = 0;
1274        int callout_data_set = 0;
1275      int count, c;      int count, c;
1276      int offsets[45];      int copystrings = 0;
1277      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
1278        int getstrings = 0;
1279        int getlist = 0;
1280        int gmatched = 0;
1281        int start_offset = 0;
1282        int g_notempty = 0;
1283        int use_dfa = 0;
1284    
1285      options = 0;      options = 0;
1286    
1287      if (infile == stdin) printf("  data> ");      pcre_callout = callout;
1288      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      first_callout = 1;
1289        callout_extra = 0;
1290        callout_count = 0;
1291        callout_fail_count = 999999;
1292        callout_fail_id = -1;
1293        show_malloc = 0;
1294    
1295        if (infile == stdin) printf("data> ");
1296        if (fgets((char *)buffer, BUFFER_SIZE, infile) == NULL)
1297        {        {
1298        done = 1;        done = 1;
1299        goto CONTINUE;        goto CONTINUE;
# Line 670  while (!done) Line 1313  while (!done)
1313        {        {
1314        int i = 0;        int i = 0;
1315        int n = 0;        int n = 0;
1316    
1317        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1318          {          {
1319          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 689  while (!done) Line 1333  while (!done)
1333          break;          break;
1334    
1335          case 'x':          case 'x':
1336    
1337            /* Handle \x{..} specially - new Perl thing for utf8 */
1338    
1339    #if !defined NOUTF8
1340            if (*p == '{')
1341              {
1342              unsigned char *pt = p;
1343              c = 0;
1344              while (isxdigit(*(++pt)))
1345                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1346              if (*pt == '}')
1347                {
1348                unsigned char buff8[8];
1349                int ii, utn;
1350                utn = ord2utf8(c, buff8);
1351                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1352                c = buff8[ii];   /* Last byte */
1353                p = pt + 1;
1354                break;
1355                }
1356              /* Not correct form; fall through */
1357              }
1358    #endif
1359    
1360            /* Ordinary \x */
1361    
1362          c = 0;          c = 0;
1363          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1364            {            {
# Line 697  while (!done) Line 1367  while (!done)
1367            }            }
1368          break;          break;
1369    
1370          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1371          p--;          p--;
1372          continue;          continue;
1373    
1374            case '>':
1375            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1376            continue;
1377    
1378          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1379          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1380          continue;          continue;
# Line 709  while (!done) Line 1383  while (!done)
1383          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1384          continue;          continue;
1385    
1386            case 'C':
1387            if (isdigit(*p))    /* Set copy string */
1388              {
1389              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1390              copystrings |= 1 << n;
1391              }
1392            else if (isalnum(*p))
1393              {
1394              uschar name[256];
1395              uschar *npp = name;
1396              while (isalnum(*p)) *npp++ = *p++;
1397              *npp = 0;
1398              n = pcre_get_stringnumber(re, (char *)name);
1399              if (n < 0)
1400                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1401              else copystrings |= 1 << n;
1402              }
1403            else if (*p == '+')
1404              {
1405              callout_extra = 1;
1406              p++;
1407              }
1408            else if (*p == '-')
1409              {
1410              pcre_callout = NULL;
1411              p++;
1412              }
1413            else if (*p == '!')
1414              {
1415              callout_fail_id = 0;
1416              p++;
1417              while(isdigit(*p))
1418                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1419              callout_fail_count = 0;
1420              if (*p == '!')
1421                {
1422                p++;
1423                while(isdigit(*p))
1424                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1425                }
1426              }
1427            else if (*p == '*')
1428              {
1429              int sign = 1;
1430              callout_data = 0;
1431              if (*(++p) == '-') { sign = -1; p++; }
1432              while(isdigit(*p))
1433                callout_data = callout_data * 10 + *p++ - '0';
1434              callout_data *= sign;
1435              callout_data_set = 1;
1436              }
1437            continue;
1438    
1439    #if !defined NODFA
1440            case 'D':
1441    #if !defined NOPOSIX
1442            if (posix || do_posix)
1443              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1444            else
1445    #endif
1446              use_dfa = 1;
1447            continue;
1448    
1449            case 'F':
1450            options |= PCRE_DFA_SHORTEST;
1451            continue;
1452    #endif
1453    
1454            case 'G':
1455            if (isdigit(*p))
1456              {
1457              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1458              getstrings |= 1 << n;
1459              }
1460            else if (isalnum(*p))
1461              {
1462              uschar name[256];
1463              uschar *npp = name;
1464              while (isalnum(*p)) *npp++ = *p++;
1465              *npp = 0;
1466              n = pcre_get_stringnumber(re, (char *)name);
1467              if (n < 0)
1468                fprintf(outfile, "no parentheses with name \"%s\"\n", name);
1469              else getstrings |= 1 << n;
1470              }
1471            continue;
1472    
1473            case 'L':
1474            getlist = 1;
1475            continue;
1476    
1477            case 'M':
1478            find_match_limit = 1;
1479            continue;
1480    
1481            case 'N':
1482            options |= PCRE_NOTEMPTY;
1483            continue;
1484    
1485          case 'O':          case 'O':
1486          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1487          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n > size_offsets_max)
1488              {
1489              size_offsets_max = n;
1490              free(offsets);
1491              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1492              if (offsets == NULL)
1493                {
1494                printf("** Failed to get %d bytes of memory for offsets vector\n",
1495                  size_offsets_max * sizeof(int));
1496                yield = 1;
1497                goto EXIT;
1498                }
1499              }
1500            use_size_offsets = n;
1501            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1502            continue;
1503    
1504            case 'P':
1505            options |= PCRE_PARTIAL;
1506            continue;
1507    
1508    #if !defined NODFA
1509            case 'R':
1510            options |= PCRE_DFA_RESTART;
1511            continue;
1512    #endif
1513    
1514            case 'S':
1515            show_malloc = 1;
1516          continue;          continue;
1517    
1518          case 'Z':          case 'Z':
1519          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1520          continue;          continue;
1521    
1522            case '?':
1523            options |= PCRE_NO_UTF8_CHECK;
1524            continue;
1525          }          }
1526        *q++ = c;        *q++ = c;
1527        }        }
1528      *q = 0;      *q = 0;
1529      len = q - dbuffer;      len = q - dbuffer;
1530    
1531        if ((all_use_dfa || use_dfa) && find_match_limit)
1532          {
1533          printf("**Match limit not relevant for DFA matching: ignored\n");
1534          find_match_limit = 0;
1535          }
1536    
1537      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1538      support timing. */      support timing or playing with the match limit or callout data. */
1539    
1540    #if !defined NOPOSIX
1541      if (posix || do_posix)      if (posix || do_posix)
1542        {        {
1543        int rc;        int rc;
1544        int eflags = 0;        int eflags = 0;
1545        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1546          if (use_size_offsets > 0)
1547            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1548        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1549        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1550    
1551        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1552    
1553        if (rc != 0)        if (rc != 0)
1554          {          {
1555          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, BUFFER_SIZE);
1556          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1557          }          }
1558        else        else
1559          {          {
1560          size_t i;          size_t i;
1561          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1562            {            {
1563            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1564              {              {
1565              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1566              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1567                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1568              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1569                if (i == 0 && do_showrest)
1570                  {
1571                  fprintf(outfile, " 0+ ");
1572                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1573                    outfile);
1574                  fprintf(outfile, "\n");
1575                  }
1576              }              }
1577            }            }
1578          }          }
1579          free(pmatch);
1580        }        }
1581    
1582      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1583    
1584      else      else
1585    #endif  /* !defined NOPOSIX */
1586    
1587        for (;; gmatched++)    /* Loop for /g or /G */
1588        {        {
1589        if (timeit)        if (timeit)
1590          {          {
1591          register int i;          register int i;
1592          clock_t time_taken;          clock_t time_taken;
1593          clock_t start_time = clock();          clock_t start_time = clock();
1594    
1595    #if !defined NODFA
1596            if (all_use_dfa || use_dfa)
1597              {
1598              int workspace[1000];
1599              for (i = 0; i < LOOPREPEAT; i++)
1600                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1601                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1602                  sizeof(workspace)/sizeof(int));
1603              }
1604            else
1605    #endif
1606    
1607          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1608            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1609              size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1610    
1611          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1612          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1613            ((double)time_taken * 1000.0)/            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1614            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));              (double)CLOCKS_PER_SEC);
1615            }
1616    
1617          /* If find_match_limit is set, we want to do repeated matches with
1618          varying limits in order to find the minimum value. */
1619    
1620          if (find_match_limit)
1621            {
1622            int min = 0;
1623            int mid = 64;
1624            int max = -1;
1625    
1626            if (extra == NULL)
1627              {
1628              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1629              extra->flags = 0;
1630              }
1631            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1632    
1633            for (;;)
1634              {
1635              extra->match_limit = mid;
1636              count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1637                options | g_notempty, use_offsets, use_size_offsets);
1638              if (count == PCRE_ERROR_MATCHLIMIT)
1639                {
1640                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1641                min = mid;
1642                mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
1643                }
1644              else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
1645                                     count == PCRE_ERROR_PARTIAL)
1646                {
1647                if (mid == min + 1)
1648                  {
1649                  fprintf(outfile, "Minimum match limit = %d\n", mid);
1650                  break;
1651                  }
1652                /* fprintf(outfile, "Testing match limit = %d\n", mid); */
1653                max = mid;
1654                mid = (min + mid)/2;
1655                }
1656              else break;    /* Some other error */
1657              }
1658    
1659            extra->flags &= ~PCRE_EXTRA_MATCH_LIMIT;
1660          }          }
1661    
1662        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* If callout_data is set, use the interface with additional data */
         size_offsets);  
1663    
1664        if (count == 0)        else if (callout_data_set)
1665          {          {
1666          fprintf(outfile, "Matched, but too many substrings\n");          if (extra == NULL)
1667          count = size_offsets/3;            {
1668              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1669              extra->flags = 0;
1670              }
1671            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1672            extra->callout_data = &callout_data;
1673            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1674              options | g_notempty, use_offsets, use_size_offsets);
1675            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1676          }          }
1677    
1678          /* The normal case is just to do the match once, with the default
1679          value of match_limit. */
1680    
1681    #if !defined NODFA
1682          else if (all_use_dfa || use_dfa)
1683            {
1684            int workspace[1000];
1685            count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1686              options | g_notempty, use_offsets, use_size_offsets, workspace,
1687              sizeof(workspace)/sizeof(int));
1688            if (count == 0)
1689              {
1690              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1691              count = use_size_offsets/2;
1692              }
1693            }
1694    #endif
1695    
1696          else
1697            {
1698            count = pcre_exec(re, extra, (char *)bptr, len,
1699              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1700            if (count == 0)
1701              {
1702              fprintf(outfile, "Matched, but too many substrings\n");
1703              count = use_size_offsets/3;
1704              }
1705            }
1706    
1707          /* Matched */
1708    
1709        if (count >= 0)        if (count >= 0)
1710          {          {
1711          int i;          int i;
1712          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1713            {            {
1714            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1715              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1716            else            else
1717              {              {
1718              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1719              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1720                  use_offsets[i+1] - use_offsets[i], outfile);
1721              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1722                if (i == 0)
1723                  {
1724                  if (do_showrest)
1725                    {
1726                    fprintf(outfile, " 0+ ");
1727                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1728                      outfile);
1729                    fprintf(outfile, "\n");
1730                    }
1731                  }
1732                }
1733              }
1734    
1735            for (i = 0; i < 32; i++)
1736              {
1737              if ((copystrings & (1 << i)) != 0)
1738                {
1739                char copybuffer[16];
1740                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1741                  i, copybuffer, sizeof(copybuffer));
1742                if (rc < 0)
1743                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1744                else
1745                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1746                }
1747              }
1748    
1749            for (i = 0; i < 32; i++)
1750              {
1751              if ((getstrings & (1 << i)) != 0)
1752                {
1753                const char *substring;
1754                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
1755                  i, &substring);
1756                if (rc < 0)
1757                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
1758                else
1759                  {
1760                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1761                  /* free((void *)substring); */
1762                  pcre_free_substring(substring);
1763                  }
1764                }
1765              }
1766    
1767            if (getlist)
1768              {
1769              const char **stringlist;
1770              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
1771                &stringlist);
1772              if (rc < 0)
1773                fprintf(outfile, "get substring list failed %d\n", rc);
1774              else
1775                {
1776                for (i = 0; i < count; i++)
1777                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1778                if (stringlist[i] != NULL)
1779                  fprintf(outfile, "string list not terminated by NULL\n");
1780                /* free((void *)stringlist); */
1781                pcre_free_substring_list(stringlist);
1782              }              }
1783            }            }
1784          }          }
1785    
1786          /* There was a partial match */
1787    
1788          else if (count == PCRE_ERROR_PARTIAL)
1789            {
1790            fprintf(outfile, "Partial match");
1791    #if !defined NODFA
1792            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
1793              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
1794                bptr + use_offsets[0]);
1795    #endif
1796            fprintf(outfile, "\n");
1797            break;  /* Out of the /g loop */
1798            }
1799    
1800          /* Failed to match. If this is a /g or /G loop and we previously set
1801          g_notempty after a null match, this is not necessarily the end.
1802          We want to advance the start offset, and continue. In the case of UTF-8
1803          matching, the advance must be one character, not one byte. Fudge the
1804          offset values to achieve this. We won't be at the end of the string -
1805          that was checked before setting g_notempty. */
1806    
1807        else        else
1808          {          {
1809          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
1810              {
1811              int onechar = 1;
1812              use_offsets[0] = start_offset;
1813              if (use_utf8)
1814                {
1815                while (start_offset + onechar < len)
1816                  {
1817                  int tb = bptr[start_offset+onechar];
1818                  if (tb <= 127) break;
1819                  tb &= 0xc0;
1820                  if (tb != 0 && tb != 0xc0) onechar++;
1821                  }
1822                }
1823              use_offsets[1] = start_offset + onechar;
1824              }
1825            else
1826              {
1827              if (count == PCRE_ERROR_NOMATCH)
1828                {
1829                if (gmatched == 0) fprintf(outfile, "No match\n");
1830                }
1831            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
1832              break;  /* Out of the /g loop */
1833              }
1834            }
1835    
1836          /* If not /g or /G we are done */
1837    
1838          if (!do_g && !do_G) break;
1839    
1840          /* If we have matched an empty string, first check to see if we are at
1841          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1842          what Perl's /g options does. This turns out to be rather cunning. First
1843          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1844          same point. If this fails (picked up above) we advance to the next
1845          character. */
1846    
1847          g_notempty = 0;
1848          if (use_offsets[0] == use_offsets[1])
1849            {
1850            if (use_offsets[0] == len) break;
1851            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1852          }          }
1853        }  
1854      }        /* For /g, update the start offset, leaving the rest alone */
1855    
1856          if (do_g) start_offset = use_offsets[1];
1857    
1858          /* For /G, update the pointer and length */
1859    
1860          else
1861            {
1862            bptr += use_offsets[1];
1863            len -= use_offsets[1];
1864            }
1865          }  /* End of loop for /g and /G */
1866        }    /* End of loop for data lines */
1867    
1868    CONTINUE:    CONTINUE:
1869    
1870    #if !defined NOPOSIX
1871    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1872    if (re != NULL) free(re);  #endif
1873    if (extra != NULL) free(extra);  
1874      if (re != NULL) new_free(re);
1875      if (extra != NULL) new_free(extra);
1876    if (tables != NULL)    if (tables != NULL)
1877      {      {
1878      free((void *)tables);      new_free((void *)tables);
1879      setlocale(LC_CTYPE, "C");      setlocale(LC_CTYPE, "C");
1880      }      }
1881    }    }
1882    
1883  fprintf(outfile, "\n");  if (infile == stdin) fprintf(outfile, "\n");
1884  return 0;  
1885    EXIT:
1886    
1887    if (infile != NULL && infile != stdin) fclose(infile);
1888    if (outfile != NULL && outfile != stdout) fclose(outfile);
1889    
1890    free(buffer);
1891    free(dbuffer);
1892    free(pbuffer);
1893    free(offsets);
1894    
1895    return yield;
1896  }  }
1897    
1898  /* End */  /* End of pcretest.c */

Legend:
Removed from v.27  
changed lines
  Added in v.85

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12