/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC
# Line 2  Line 2 
2  *             PCRE testing program               *  *             PCRE testing program               *
3  *************************************************/  *************************************************/
4    
5    /* This program was hacked up as a tester for PCRE. I really should have
6    written it more tidily in the first place. Will I ever learn? It has grown and
7    been extended and consequently is now rather, er, *very* untidy in places.
8    
9    -----------------------------------------------------------------------------
10    Redistribution and use in source and binary forms, with or without
11    modification, are permitted provided that the following conditions are met:
12    
13        * Redistributions of source code must retain the above copyright notice,
14          this list of conditions and the following disclaimer.
15    
16        * Redistributions in binary form must reproduce the above copyright
17          notice, this list of conditions and the following disclaimer in the
18          documentation and/or other materials provided with the distribution.
19    
20        * Neither the name of the University of Cambridge nor the names of its
21          contributors may be used to endorse or promote products derived from
22          this software without specific prior written permission.
23    
24    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25    AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26    IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27    ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28    LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29    CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30    SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31    INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32    CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33    ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34    POSSIBILITY OF SUCH DAMAGE.
35    -----------------------------------------------------------------------------
36    */
37    
38    
39  #include <ctype.h>  #include <ctype.h>
40  #include <stdio.h>  #include <stdio.h>
41  #include <string.h>  #include <string.h>
42  #include <stdlib.h>  #include <stdlib.h>
43  #include <time.h>  #include <time.h>
44    #include <locale.h>
45    #include <errno.h>
46    
47    #ifndef _WIN32
48    #include <sys/resource.h>
49    #endif
50    
51    #define PCRE_SPY        /* For Win32 build, import data, not export */
52    
53    /* We include pcre_internal.h because we need the internal info for displaying
54    the results of pcre_study() and we also need to know about the internal
55    macros, structures, and other internal data values; pcretest has "inside
56    information" compared to a program that strictly follows the PCRE API. */
57    
58    #include "pcre_internal.h"
59    
60    /* We need access to the data tables that PCRE uses. So as not to have to keep
61    two copies, we include the source file here, changing the names of the external
62    symbols to prevent clashes. */
63    
64    #define _pcre_utf8_table1      utf8_table1
65    #define _pcre_utf8_table1_size utf8_table1_size
66    #define _pcre_utf8_table2      utf8_table2
67    #define _pcre_utf8_table3      utf8_table3
68    #define _pcre_utf8_table4      utf8_table4
69    #define _pcre_utt              utt
70    #define _pcre_utt_size         utt_size
71    #define _pcre_OP_lengths       OP_lengths
72    
73    #include "pcre_tables.c"
74    
75    /* We also need the pcre_printint() function for printing out compiled
76    patterns. This function is in a separate file so that it can be included in
77    pcre_compile.c when that module is compiled with debugging enabled. */
78    
79    #include "pcre_printint.src"
80    
 /* Use the internal info for displaying the results of pcre_study(). */  
81    
82  #include "internal.h"  /* It is possible to compile this test program without including support for
83    testing the POSIX interface, though this is not available via the standard
84    Makefile. */
85    
86    #if !defined NOPOSIX
87  #include "pcreposix.h"  #include "pcreposix.h"
88    #endif
89    
90    /* It is also possible, for the benefit of the version imported into Exim, to
91    build pcretest without support for UTF8 (define NOUTF8), without the interface
92    to the DFA matcher (NODFA), and without the doublecheck of the old "info"
93    function (define NOINFOCHECK). */
94    
95    
96    /* Other parameters */
97    
98  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
99  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 21  Line 103 
103  #endif  #endif
104  #endif  #endif
105    
106    #define LOOPREPEAT 500000
107    
108    /* Static variables */
109    
110  static FILE *outfile;  static FILE *outfile;
111  static int log_store = 0;  static int log_store = 0;
112    static int callout_count;
113    static int callout_extra;
114    static int callout_fail_count;
115    static int callout_fail_id;
116    static int first_callout;
117    static int show_malloc;
118    static int use_utf8;
119    static size_t gotten_store;
120    
121    /* The buffers grow automatically if very long input lines are encountered. */
122    
123    static int buffer_size = 50000;
124    static uschar *buffer = NULL;
125    static uschar *dbuffer = NULL;
126    static uschar *pbuffer = NULL;
127    
128    
129    
130  /* Debugging function to print the internal form of the regex. This is the same  /*************************************************
131  code as contained in pcre.c under the DEBUG macro. */  *        Read or extend an input line            *
132    *************************************************/
133    
134  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  /* Input lines are read into buffer, but both patterns and data lines can be
135    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",  continued over multiple input lines. In addition, if the buffer fills up, we
136    "not",  want to automatically expand it so as to be able to handle extremely large
137    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  lines that are needed for certain stress tests. When the input buffer is
138    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  expanded, the other two buffers must also be expanded likewise, and the
139    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",  contents of pbuffer, which are a copy of the input for callouts, must be
140    "*", "*?", "+", "+?", "?", "??", "{", "{",  preserved (for when expansion happens for a data line). This is not the most
141    "class", "Ref",  optimal way of handling this, but hey, this is just a test program!
142    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",  
143    "Brazero", "Braminzero", "Bra"  Arguments:
144  };    f            the file to read
145      start        where in buffer to start (this *must* be within buffer)
146    
147  static void print_internals(pcre *re)  Returns:       pointer to the start of new data
148  {                 could be a copy of start, or could be moved
149  unsigned char *code = ((real_pcre *)re)->code;                 NULL if no data read and EOF reached
150    */
 printf("------------------------------------------------------------------\n");  
   
 for(;;)  
   {  
   int c;  
   int charlength;  
   
   printf("%3d ", code - ((real_pcre *)re)->code);  
   
   if (*code >= OP_BRA)  
     {  
     printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA);  
     code += 2;  
     }  
   
   else switch(*code)  
     {  
     case OP_END:  
     printf("    %s\n", OP_names[*code]);  
     printf("------------------------------------------------------------------\n");  
     return;  
   
     case OP_CHARS:  
     charlength = *(++code);  
     printf("%3d ", charlength);  
     while (charlength-- > 0)  
       if (isprint(c = *(++code))) printf("%c", c); else printf("\\x%02x", c);  
     break;  
   
     case OP_KETRMAX:  
     case OP_KETRMIN:  
     case OP_ALT:  
     case OP_KET:  
     case OP_ASSERT:  
     case OP_ASSERT_NOT:  
     case OP_ONCE:  
     printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);  
     code += 2;  
     break;  
   
     case OP_STAR:  
     case OP_MINSTAR:  
     case OP_PLUS:  
     case OP_MINPLUS:  
     case OP_QUERY:  
     case OP_MINQUERY:  
     case OP_TYPESTAR:  
     case OP_TYPEMINSTAR:  
     case OP_TYPEPLUS:  
     case OP_TYPEMINPLUS:  
     case OP_TYPEQUERY:  
     case OP_TYPEMINQUERY:  
     if (*code >= OP_TYPESTAR)  
       printf("    %s", OP_names[code[1]]);  
     else if (isprint(c = code[1])) printf("    %c", c);  
       else printf("    \\x%02x", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_EXACT:  
     case OP_UPTO:  
     case OP_MINUPTO:  
     if (isprint(c = code[3])) printf("    %c{", c);  
       else printf("    \\x%02x{", c);  
     if (*code != OP_EXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_MINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_TYPEEXACT:  
     case OP_TYPEUPTO:  
     case OP_TYPEMINUPTO:  
     printf("    %s{", OP_names[code[3]]);  
     if (*code != OP_TYPEEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_TYPEMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_NOT:  
     if (isprint(c = *(++code))) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     break;  
   
     case OP_NOTSTAR:  
     case OP_NOTMINSTAR:  
     case OP_NOTPLUS:  
     case OP_NOTMINPLUS:  
     case OP_NOTQUERY:  
     case OP_NOTMINQUERY:  
     if (isprint(c = code[1])) printf("    [^%c]", c);  
       else printf("    [^\\x%02x]", c);  
     printf("%s", OP_names[*code++]);  
     break;  
   
     case OP_NOTEXACT:  
     case OP_NOTUPTO:  
     case OP_NOTMINUPTO:  
     if (isprint(c = code[3])) printf("    [^%c]{", c);  
       else printf("    [^\\x%02x]{", c);  
     if (*code != OP_NOTEXACT) printf(",");  
     printf("%d}", (code[1] << 8) + code[2]);  
     if (*code == OP_NOTMINUPTO) printf("?");  
     code += 3;  
     break;  
   
     case OP_REF:  
     printf("    \\%d", *(++code));  
     break;  
   
     case OP_CLASS:  
       {  
       int i, min, max;  
   
       code++;  
       printf("    [");  
   
       for (i = 0; i < 256; i++)  
         {  
         if ((code[i/8] & (1 << (i&7))) != 0)  
           {  
           int j;  
           for (j = i+1; j < 256; j++)  
             if ((code[j/8] & (1 << (j&7))) == 0) break;  
           if (i == '-' || i == ']') printf("\\");  
           if (isprint(i)) printf("%c", i); else printf("\\x%02x", i);  
           if (--j > i)  
             {  
             printf("-");  
             if (j == '-' || j == ']') printf("\\");  
             if (isprint(j)) printf("%c", j); else printf("\\x%02x", j);  
             }  
           i = j;  
           }  
         }  
       printf("]");  
       code += 32;  
   
       switch(*code)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRPLUS:  
         case OP_CRMINPLUS:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         printf("%s", OP_names[*code]);  
         break;  
151    
152          case OP_CRRANGE:  static uschar *
153          case OP_CRMINRANGE:  extend_inputline(FILE *f, uschar *start)
154          min = (code[1] << 8) + code[2];  {
155          max = (code[3] << 8) + code[4];  uschar *here = start;
         if (max == 0) printf("{%d,}", min);  
         else printf("{%d,%d}", min, max);  
         if (*code == OP_CRMINRANGE) printf("?");  
         code += 4;  
         break;  
156    
157          default:  for (;;)
158          code--;    {
159          }    int rlen = buffer_size - (here - buffer);
160      if (rlen > 1000)
161        {
162        int dlen;
163        if (fgets((char *)here, rlen,  f) == NULL)
164          return (here == start)? NULL : start;
165        dlen = (int)strlen((char *)here);
166        if (dlen > 0 && here[dlen - 1] == '\n') return start;
167        here += dlen;
168        }
169    
170      else
171        {
172        int new_buffer_size = 2*buffer_size;
173        uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
174        uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
175        uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
176    
177        if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
178          {
179          fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
180          exit(1);
181        }        }
     break;  
182    
183      /* Anything else is just a one-node item */      memcpy(new_buffer, buffer, buffer_size);
184        memcpy(new_pbuffer, pbuffer, buffer_size);
185    
186        buffer_size = new_buffer_size;
187    
188        start = new_buffer + (start - buffer);
189        here = new_buffer + (here - buffer);
190    
191        free(buffer);
192        free(dbuffer);
193        free(pbuffer);
194    
195      default:      buffer = new_buffer;
196      printf("    %s", OP_names[*code]);      dbuffer = new_dbuffer;
197      break;      pbuffer = new_pbuffer;
198      }      }
199      }
200    
201    return NULL;  /* Control never gets here */
202    }
203    
204    
205    
206    
207    
208    
209    
210    /*************************************************
211    *          Read number from string               *
212    *************************************************/
213    
214    /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
215    around with conditional compilation, just do the job by hand. It is only used
216    for unpicking the -o argument, so just keep it simple.
217    
218    Arguments:
219      str           string to be converted
220      endptr        where to put the end pointer
221    
222    code++;  Returns:        the unsigned long
223    printf("\n");  */
224    
225    static int
226    get_value(unsigned char *str, unsigned char **endptr)
227    {
228    int result = 0;
229    while(*str != 0 && isspace(*str)) str++;
230    while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
231    *endptr = str;
232    return(result);
233    }
234    
235    
236    
237    
238    /*************************************************
239    *            Convert UTF-8 string to value       *
240    *************************************************/
241    
242    /* This function takes one or more bytes that represents a UTF-8 character,
243    and returns the value of the character.
244    
245    Argument:
246      utf8bytes   a pointer to the byte vector
247      vptr        a pointer to an int to receive the value
248    
249    Returns:      >  0 => the number of bytes consumed
250                  -6 to 0 => malformed UTF-8 character at offset = (-return)
251    */
252    
253    #if !defined NOUTF8
254    
255    static int
256    utf82ord(unsigned char *utf8bytes, int *vptr)
257    {
258    int c = *utf8bytes++;
259    int d = c;
260    int i, j, s;
261    
262    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
263      {
264      if ((d & 0x80) == 0) break;
265      d <<= 1;
266      }
267    
268    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
269    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
270    
271    /* i now has a value in the range 1-5 */
272    
273    s = 6*i;
274    d = (c & utf8_table3[i]) << s;
275    
276    for (j = 0; j < i; j++)
277      {
278      c = *utf8bytes++;
279      if ((c & 0xc0) != 0x80) return -(j+1);
280      s -= 6;
281      d |= (c & 0x3f) << s;
282    }    }
283    
284    /* Check that encoding was the correct unique one */
285    
286    for (j = 0; j < utf8_table1_size; j++)
287      if (d <= utf8_table1[j]) break;
288    if (j != i) return -(i+1);
289    
290    /* Valid value */
291    
292    *vptr = d;
293    return i+1;
294    }
295    
296    #endif
297    
298    
299    
300    /*************************************************
301    *       Convert character value to UTF-8         *
302    *************************************************/
303    
304    /* This function takes an integer value in the range 0 - 0x7fffffff
305    and encodes it as a UTF-8 character in 0 to 6 bytes.
306    
307    Arguments:
308      cvalue     the character value
309      utf8bytes  pointer to buffer for result - at least 6 bytes long
310    
311    Returns:     number of characters placed in the buffer
312    */
313    
314    static int
315    ord2utf8(int cvalue, uschar *utf8bytes)
316    {
317    register int i, j;
318    for (i = 0; i < utf8_table1_size; i++)
319      if (cvalue <= utf8_table1[i]) break;
320    utf8bytes += i;
321    for (j = i; j > 0; j--)
322     {
323     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
324     cvalue >>= 6;
325     }
326    *utf8bytes = utf8_table2[i] | cvalue;
327    return i + 1;
328  }  }
329    
330    
331    
332  /* Character string printing function. */  /*************************************************
333    *             Print character string             *
334    *************************************************/
335    
336    /* Character string printing function. Must handle UTF-8 strings in utf8
337    mode. Yields number of characters printed. If handed a NULL file, just counts
338    chars without printing. */
339    
340  static void pchars(unsigned char *p, int length)  static int pchars(unsigned char *p, int length, FILE *f)
341  {  {
342  int c;  int c = 0;
343    int yield = 0;
344    
345  while (length-- > 0)  while (length-- > 0)
346    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    {
347      else fprintf(outfile, "\\x%02x", c);  #if !defined NOUTF8
348      if (use_utf8)
349        {
350        int rc = utf82ord(p, &c);
351    
352        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
353          {
354          length -= rc - 1;
355          p += rc;
356          if (c < 256 && isprint(c))
357            {
358            if (f != NULL) fprintf(f, "%c", c);
359            yield++;
360            }
361          else
362            {
363            int n;
364            if (f != NULL) fprintf(f, "\\x{%02x}%n", c, &n);
365            yield += n;
366            }
367          continue;
368          }
369        }
370    #endif
371    
372       /* Not UTF-8, or malformed UTF-8  */
373    
374      if (isprint(c = *(p++)))
375        {
376        if (f != NULL) fprintf(f, "%c", c);
377        yield++;
378        }
379      else
380        {
381        if (f != NULL) fprintf(f, "\\x%02x", c);
382        yield += 4;
383        }
384      }
385    
386    return yield;
387  }  }
388    
389    
390    
391    /*************************************************
392    *              Callout function                  *
393    *************************************************/
394    
395    /* Called from PCRE as a result of the (?C) item. We print out where we are in
396    the match. Yield zero unless more callouts than the fail count, or the callout
397    data is not zero. */
398    
399    static int callout(pcre_callout_block *cb)
400    {
401    FILE *f = (first_callout | callout_extra)? outfile : NULL;
402    int i, pre_start, post_start, subject_length;
403    
404    if (callout_extra)
405      {
406      fprintf(f, "Callout %d: last capture = %d\n",
407        cb->callout_number, cb->capture_last);
408    
409      for (i = 0; i < cb->capture_top * 2; i += 2)
410        {
411        if (cb->offset_vector[i] < 0)
412          fprintf(f, "%2d: <unset>\n", i/2);
413        else
414          {
415          fprintf(f, "%2d: ", i/2);
416          (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
417            cb->offset_vector[i+1] - cb->offset_vector[i], f);
418          fprintf(f, "\n");
419          }
420        }
421      }
422    
423    /* Re-print the subject in canonical form, the first time or if giving full
424    datails. On subsequent calls in the same match, we use pchars just to find the
425    printed lengths of the substrings. */
426    
427    if (f != NULL) fprintf(f, "--->");
428    
429    pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
430    post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
431      cb->current_position - cb->start_match, f);
432    
433    subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
434    
435    (void)pchars((unsigned char *)(cb->subject + cb->current_position),
436      cb->subject_length - cb->current_position, f);
437    
438    if (f != NULL) fprintf(f, "\n");
439    
440    /* Always print appropriate indicators, with callout number if not already
441    shown. For automatic callouts, show the pattern offset. */
442    
443    if (cb->callout_number == 255)
444      {
445      fprintf(outfile, "%+3d ", cb->pattern_position);
446      if (cb->pattern_position > 99) fprintf(outfile, "\n    ");
447      }
448    else
449      {
450      if (callout_extra) fprintf(outfile, "    ");
451        else fprintf(outfile, "%3d ", cb->callout_number);
452      }
453    
454    for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
455    fprintf(outfile, "^");
456    
457    if (post_start > 0)
458      {
459      for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
460      fprintf(outfile, "^");
461      }
462    
463    for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
464      fprintf(outfile, " ");
465    
466    fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
467      pbuffer + cb->pattern_position);
468    
469    fprintf(outfile, "\n");
470    first_callout = 0;
471    
472    if (cb->callout_data != NULL)
473      {
474      int callout_data = *((int *)(cb->callout_data));
475      if (callout_data != 0)
476        {
477        fprintf(outfile, "Callout data = %d\n", callout_data);
478        return callout_data;
479        }
480      }
481    
482    return (cb->callout_number != callout_fail_id)? 0 :
483           (++callout_count >= callout_fail_count)? 1 : 0;
484    }
485    
486    
487    /*************************************************
488    *            Local malloc functions              *
489    *************************************************/
490    
491  /* Alternative malloc function, to test functionality and show the size of the  /* Alternative malloc function, to test functionality and show the size of the
492  compiled re. */  compiled re. */
493    
494  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
495  {  {
496  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  void *block = malloc(size);
497  return malloc(size);  gotten_store = size;
498    if (show_malloc)
499      fprintf(outfile, "malloc       %3d %p\n", (int)size, block);
500    return block;
501    }
502    
503    static void new_free(void *block)
504    {
505    if (show_malloc)
506      fprintf(outfile, "free             %p\n", block);
507    free(block);
508    }
509    
510    
511    /* For recursion malloc/free, to test stacking calls */
512    
513    static void *stack_malloc(size_t size)
514    {
515    void *block = malloc(size);
516    if (show_malloc)
517      fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
518    return block;
519    }
520    
521    static void stack_free(void *block)
522    {
523    if (show_malloc)
524      fprintf(outfile, "stack_free       %p\n", block);
525    free(block);
526  }  }
527    
528    
529    /*************************************************
530    *          Call pcre_fullinfo()                  *
531    *************************************************/
532    
533    /* Get one piece of information from the pcre_fullinfo() function */
534    
535    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
536    {
537    int rc;
538    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
539      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
540    }
541    
542    
543    
544    /*************************************************
545    *         Byte flipping function                 *
546    *************************************************/
547    
548    static unsigned long int
549    byteflip(unsigned long int value, int n)
550    {
551    if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
552    return ((value & 0x000000ff) << 24) |
553           ((value & 0x0000ff00) <<  8) |
554           ((value & 0x00ff0000) >>  8) |
555           ((value & 0xff000000) >> 24);
556    }
557    
558    
559    
560    
561    /*************************************************
562    *        Check match or recursion limit          *
563    *************************************************/
564    
565    static int
566    check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
567      int start_offset, int options, int *use_offsets, int use_size_offsets,
568      int flag, unsigned long int *limit, int errnumber, const char *msg)
569    {
570    int count;
571    int min = 0;
572    int mid = 64;
573    int max = -1;
574    
575    extra->flags |= flag;
576    
577    for (;;)
578      {
579      *limit = mid;
580    
581      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
582        use_offsets, use_size_offsets);
583    
584      if (count == errnumber)
585        {
586        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
587        min = mid;
588        mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
589        }
590    
591      else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
592                             count == PCRE_ERROR_PARTIAL)
593        {
594        if (mid == min + 1)
595          {
596          fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
597          break;
598          }
599        /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
600        max = mid;
601        mid = (min + mid)/2;
602        }
603      else break;    /* Some other error */
604      }
605    
606    extra->flags &= ~flag;
607    return count;
608    }
609    
610    
611    
612    /*************************************************
613    *         Check newline indicator                *
614    *************************************************/
615    
616    /* This is used both at compile and run-time to check for <xxx> escapes, where
617    xxx is LF, CR, or CRLF. Print a message and return 0 if there is no match.
618    
619    Arguments:
620      p           points after the leading '<'
621      f           file for error message
622    
623    Returns:      appropriate PCRE_NEWLINE_xxx flags, or 0
624    */
625    
626    static int
627    check_newline(uschar *p, FILE *f)
628    {
629    if (strncmp((char *)p, "cr>", 3) == 0) return PCRE_NEWLINE_CR;
630    if (strncmp((char *)p, "lf>", 3) == 0) return PCRE_NEWLINE_LF;
631    if (strncmp((char *)p, "crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
632    fprintf(f, "Unknown newline type at: <%s\n", p);
633    return 0;
634    }
635    
636    
637    
638    /*************************************************
639    *                Main Program                    *
640    *************************************************/
641    
642  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
643  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
# Line 262  int study_options = 0; Line 651  int study_options = 0;
651  int op = 1;  int op = 1;
652  int timeit = 0;  int timeit = 0;
653  int showinfo = 0;  int showinfo = 0;
654    int showstore = 0;
655    int quiet = 0;
656    int size_offsets = 45;
657    int size_offsets_max;
658    int *offsets = NULL;
659    #if !defined NOPOSIX
660  int posix = 0;  int posix = 0;
661    #endif
662  int debug = 0;  int debug = 0;
663  unsigned char buffer[30000];  int done = 0;
664  unsigned char dbuffer[1024];  int all_use_dfa = 0;
665    int yield = 0;
666  /* Static so that new_malloc can use it. */  int stack_size;
667    
668    /* These vectors store, end-to-end, a list of captured substring names. Assume
669    that 1024 is plenty long enough for the few names we'll be testing. */
670    
671    uschar copynames[1024];
672    uschar getnames[1024];
673    
674    uschar *copynamesptr;
675    uschar *getnamesptr;
676    
677    /* Get buffers from malloc() so that Electric Fence will check their misuse
678    when I am debugging. They grow automatically when very long lines are read. */
679    
680    buffer = (unsigned char *)malloc(buffer_size);
681    dbuffer = (unsigned char *)malloc(buffer_size);
682    pbuffer = (unsigned char *)malloc(buffer_size);
683    
684    /* The outfile variable is static so that new_malloc can use it. The _setmode()
685    stuff is some magic that I don't understand, but which apparently does good
686    things in Windows. It's related to line terminations.  */
687    
688    #if defined(_WIN32) || defined(WIN32)
689    _setmode( _fileno( stdout ), 0x8000 );
690    #endif  /* defined(_WIN32) || defined(WIN32) */
691    
692  outfile = stdout;  outfile = stdout;
693    
# Line 275  outfile = stdout; Line 695  outfile = stdout;
695    
696  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
697    {    {
698    if (strcmp(argv[op], "-s") == 0) log_store = 1;    unsigned char *endptr;
699    
700      if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
701        showstore = 1;
702    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
703      else if (strcmp(argv[op], "-q") == 0) quiet = 1;
704    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
705    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
706    #if !defined NODFA
707      else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
708    #endif
709      else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
710          ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
711            *endptr == 0))
712        {
713        op++;
714        argc--;
715        }
716      else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
717          ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
718            *endptr == 0))
719        {
720    #ifdef _WIN32
721        printf("PCRE: -S not supported on this OS\n");
722        exit(1);
723    #else
724        int rc;
725        struct rlimit rlim;
726        getrlimit(RLIMIT_STACK, &rlim);
727        rlim.rlim_cur = stack_size * 1024 * 1024;
728        rc = setrlimit(RLIMIT_STACK, &rlim);
729        if (rc != 0)
730          {
731        printf("PCRE: setrlimit() failed with error %d\n", rc);
732        exit(1);
733          }
734        op++;
735        argc--;
736    #endif
737        }
738    #if !defined NOPOSIX
739    else if (strcmp(argv[op], "-p") == 0) posix = 1;    else if (strcmp(argv[op], "-p") == 0) posix = 1;
740    #endif
741      else if (strcmp(argv[op], "-C") == 0)
742        {
743        int rc;
744        printf("PCRE version %s\n", pcre_version());
745        printf("Compiled with\n");
746        (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
747        printf("  %sUTF-8 support\n", rc? "" : "No ");
748        (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
749        printf("  %sUnicode properties support\n", rc? "" : "No ");
750        (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
751        printf("  Newline sequence is %s\n", (rc == '\r')? "CR" :
752          (rc == '\n')? "LF" : "CRLF");
753        (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
754        printf("  Internal link size = %d\n", rc);
755        (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
756        printf("  POSIX malloc threshold = %d\n", rc);
757        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
758        printf("  Default match limit = %d\n", rc);
759        (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
760        printf("  Default recursion depth limit = %d\n", rc);
761        (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
762        printf("  Match recursion uses %s\n", rc? "stack" : "heap");
763        exit(0);
764        }
765    else    else
766      {      {
767      printf("*** Unknown option %s\n", argv[op]);      printf("** Unknown or malformed option %s\n", argv[op]);
768      return 1;      printf("Usage:   pcretest [options] [<input> [<output>]]\n");
769        printf("  -C     show PCRE compile-time options and exit\n");
770        printf("  -d     debug: show compiled code; implies -i\n");
771    #if !defined NODFA
772        printf("  -dfa   force DFA matching for all subjects\n");
773    #endif
774        printf("  -i     show information about compiled pattern\n"
775               "  -m     output memory used information\n"
776               "  -o <n> set size of offsets vector to <n>\n");
777    #if !defined NOPOSIX
778        printf("  -p     use POSIX interface\n");
779    #endif
780        printf("  -S <n> set stack size to <n> megabytes\n");
781        printf("  -s     output store (memory) used information\n"
782               "  -t     time compilation and execution\n");
783        yield = 1;
784        goto EXIT;
785      }      }
786    op++;    op++;
787    argc--;    argc--;
788    }    }
789    
790    /* Get the store for the offsets vector, and remember what it was */
791    
792    size_offsets_max = size_offsets;
793    offsets = (int *)malloc(size_offsets_max * sizeof(int));
794    if (offsets == NULL)
795      {
796      printf("** Failed to get %d bytes of memory for offsets vector\n",
797        size_offsets_max * sizeof(int));
798      yield = 1;
799      goto EXIT;
800      }
801    
802  /* Sort out the input and output files */  /* Sort out the input and output files */
803    
804  if (argc > 1)  if (argc > 1)
805    {    {
806    infile = fopen(argv[op], "r");    infile = fopen(argv[op], "rb");
807    if (infile == NULL)    if (infile == NULL)
808      {      {
809      printf("** Failed to open %s\n", argv[op]);      printf("** Failed to open %s\n", argv[op]);
810      return 1;      yield = 1;
811        goto EXIT;
812      }      }
813    }    }
814    
815  if (argc > 2)  if (argc > 2)
816    {    {
817    outfile = fopen(argv[op+1], "w");    outfile = fopen(argv[op+1], "wb");
818    if (outfile == NULL)    if (outfile == NULL)
819      {      {
820      printf("** Failed to open %s\n", argv[op+1]);      printf("** Failed to open %s\n", argv[op+1]);
821      return 1;      yield = 1;
822        goto EXIT;
823      }      }
824    }    }
825    
826  /* Set alternative malloc function */  /* Set alternative malloc function */
827    
828  pcre_malloc = new_malloc;  pcre_malloc = new_malloc;
829    pcre_free = new_free;
830    pcre_stack_malloc = stack_malloc;
831    pcre_stack_free = stack_free;
832    
833  /* Heading line, then prompt for first re if stdin */  /* Heading line unless quiet, then prompt for first regex if stdin */
834    
835  fprintf(outfile, "Testing Perl-Compatible Regular Expressions\n");  if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
 fprintf(outfile, "PCRE version %s\n\n", pcre_version());  
836    
837  /* Main loop */  /* Main loop */
838    
839  for (;;)  while (!done)
840    {    {
841    pcre *re = NULL;    pcre *re = NULL;
842    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
843    
844    #if !defined NOPOSIX  /* There are still compilers that require no indent */
845    regex_t preg;    regex_t preg;
   char *error;  
   unsigned char *p, *pp;  
   int do_study = 0;  
   int do_debug = 0;  
846    int do_posix = 0;    int do_posix = 0;
847    #endif
848    
849      const char *error;
850      unsigned char *p, *pp, *ppp;
851      unsigned char *to_file = NULL;
852      const unsigned char *tables = NULL;
853      unsigned long int true_size, true_study_size = 0;
854      size_t size, regex_gotten_store;
855      int do_study = 0;
856      int do_debug = debug;
857      int do_G = 0;
858      int do_g = 0;
859      int do_showinfo = showinfo;
860      int do_showrest = 0;
861      int do_flip = 0;
862    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
863    
864      use_utf8 = 0;
865    
866    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
867    if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) break;    if (extend_inputline(infile, buffer) == NULL) break;
868    if (infile != stdin) fprintf(outfile, (char *)buffer);    if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
869      fflush(outfile);
870    
871    p = buffer;    p = buffer;
872    while (isspace(*p)) p++;    while (isspace(*p)) p++;
873    if (*p == 0) continue;    if (*p == 0) continue;
874    
875    /* Get the delimiter and seek the end of the pattern; if is isn't    /* See if the pattern is to be loaded pre-compiled from a file. */
876    complete, read more. */  
877      if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
878        {
879        unsigned long int magic, get_options;
880        uschar sbuf[8];
881        FILE *f;
882    
883        p++;
884        pp = p + (int)strlen((char *)p);
885        while (isspace(pp[-1])) pp--;
886        *pp = 0;
887    
888        f = fopen((char *)p, "rb");
889        if (f == NULL)
890          {
891          fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
892          continue;
893          }
894    
895        if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
896    
897        true_size =
898          (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
899        true_study_size =
900          (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
901    
902        re = (real_pcre *)new_malloc(true_size);
903        regex_gotten_store = gotten_store;
904    
905        if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
906    
907        magic = ((real_pcre *)re)->magic_number;
908        if (magic != MAGIC_NUMBER)
909          {
910          if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
911            {
912            do_flip = 1;
913            }
914          else
915            {
916            fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
917            fclose(f);
918            continue;
919            }
920          }
921    
922        fprintf(outfile, "Compiled regex%s loaded from %s\n",
923          do_flip? " (byte-inverted)" : "", p);
924    
925        /* Need to know if UTF-8 for printing data strings */
926    
927        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
928        use_utf8 = (get_options & PCRE_UTF8) != 0;
929    
930        /* Now see if there is any following study data */
931    
932        if (true_study_size != 0)
933          {
934          pcre_study_data *psd;
935    
936          extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
937          extra->flags = PCRE_EXTRA_STUDY_DATA;
938    
939          psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
940          extra->study_data = psd;
941    
942          if (fread(psd, 1, true_study_size, f) != true_study_size)
943            {
944            FAIL_READ:
945            fprintf(outfile, "Failed to read data from %s\n", p);
946            if (extra != NULL) new_free(extra);
947            if (re != NULL) new_free(re);
948            fclose(f);
949            continue;
950            }
951          fprintf(outfile, "Study data loaded from %s\n", p);
952          do_study = 1;     /* To get the data output if requested */
953          }
954        else fprintf(outfile, "No study data\n");
955    
956        fclose(f);
957        goto SHOW_INFO;
958        }
959    
960      /* In-line pattern (the usual case). Get the delimiter and seek the end of
961      the pattern; if is isn't complete, read more. */
962    
963    delimiter = *p++;    delimiter = *p++;
964    
965    if (isalnum(delimiter))    if (isalnum(delimiter) || delimiter == '\\')
966      {      {
967      fprintf(outfile, "** Delimiter must not be alphameric\n");      fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
968      goto SKIP_DATA;      goto SKIP_DATA;
969      }      }
970    
# Line 357  for (;;) Line 972  for (;;)
972    
973    for(;;)    for(;;)
974      {      {
975      while (*pp != 0 && *pp != delimiter) pp++;      while (*pp != 0)
     if (*pp != 0) break;  
   
     len = sizeof(buffer) - (pp - buffer);  
     if (len < 256)  
976        {        {
977        fprintf(outfile, "** Expression too long - missing delimiter?\n");        if (*pp == '\\' && pp[1] != 0) pp++;
978        goto SKIP_DATA;          else if (*pp == delimiter) break;
979          pp++;
980        }        }
981        if (*pp != 0) break;
982      if (infile == stdin) printf("    > ");      if (infile == stdin) printf("    > ");
983      if (fgets((char *)pp, len, infile) == NULL)      if ((pp = extend_inputline(infile, pp)) == NULL)
984        {        {
985        fprintf(outfile, "** Unexpected EOF\n");        fprintf(outfile, "** Unexpected EOF\n");
986        goto END_OFF;        done = 1;
987          goto CONTINUE;
988        }        }
989      if (infile != stdin) fprintf(outfile, (char *)pp);      if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
990      }      }
991    
992    /* Terminate the pattern at the delimiter */    /* If the first character after the delimiter is backslash, make
993      the pattern end with backslash. This is purely to provide a way
994      of testing for the error message when a pattern ends with backslash. */
995    
996      if (pp[1] == '\\') *pp++ = '\\';
997    
998      /* Terminate the pattern at the delimiter, and save a copy of the pattern
999      for callouts. */
1000    
1001    *pp++ = 0;    *pp++ = 0;
1002      strcpy((char *)pbuffer, (char *)p);
1003    
1004    /* Look for options after final delimiter */    /* Look for options after final delimiter */
1005    
1006    options = 0;    options = 0;
1007    study_options = 0;    study_options = 0;
1008      log_store = showstore;  /* default from command line */
1009    
1010    while (*pp != 0)    while (*pp != 0)
1011      {      {
1012      switch (*pp++)      switch (*pp++)
1013        {        {
1014          case 'f': options |= PCRE_FIRSTLINE; break;
1015          case 'g': do_g = 1; break;
1016        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
1017        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
1018        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
1019        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
1020    
1021          case '+': do_showrest = 1; break;
1022        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
1023        case 'D': do_debug = 1; break;        case 'C': options |= PCRE_AUTO_CALLOUT; break;
1024          case 'D': do_debug = do_showinfo = 1; break;
1025        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1026          case 'F': do_flip = 1; break;
1027          case 'G': do_G = 1; break;
1028          case 'I': do_showinfo = 1; break;
1029          case 'J': options |= PCRE_DUPNAMES; break;
1030          case 'M': log_store = 1; break;
1031          case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1032    
1033    #if !defined NOPOSIX
1034        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
1035    #endif
1036    
1037        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
1038        case 'I': study_options |= PCRE_CASELESS; break;        case 'U': options |= PCRE_UNGREEDY; break;
1039        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
1040        case '\n': case ' ': break;        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1041          case '?': options |= PCRE_NO_UTF8_CHECK; break;
1042    
1043          case 'L':
1044          ppp = pp;
1045          /* The '\r' test here is so that it works on Windows */
1046          while (*ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1047          *ppp = 0;
1048          if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1049            {
1050            fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1051            goto SKIP_DATA;
1052            }
1053          tables = pcre_maketables();
1054          pp = ppp;
1055          break;
1056    
1057          case '>':
1058          to_file = pp;
1059          while (*pp != 0) pp++;
1060          while (isspace(pp[-1])) pp--;
1061          *pp = 0;
1062          break;
1063    
1064          case '<':
1065            {
1066            int x = check_newline(pp, outfile);
1067            if (x == 0) goto SKIP_DATA;
1068            options |= x;
1069            while (*pp++ != '>');
1070            }
1071          break;
1072    
1073          case '\r':                      /* So that it works in Windows */
1074          case '\n':
1075          case ' ':
1076          break;
1077    
1078        default:        default:
1079        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);        fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1080        goto SKIP_DATA;        goto SKIP_DATA;
1081        }        }
1082      }      }
1083    
1084    /* Handle compiing via the POSIX interface, which doesn't support the    /* Handle compiling via the POSIX interface, which doesn't support the
1085    timing, showing, or debugging options. */    timing, showing, or debugging options, nor the ability to pass over
1086      local character tables. */
1087    
1088    #if !defined NOPOSIX
1089    if (posix || do_posix)    if (posix || do_posix)
1090      {      {
1091      int rc;      int rc;
1092      int cflags = 0;      int cflags = 0;
1093    
1094      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;      if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1095      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;      if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1096        if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1097        if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1098        if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1099    
1100      rc = regcomp(&preg, (char *)p, cflags);      rc = regcomp(&preg, (char *)p, cflags);
1101    
1102      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
# Line 422  for (;;) Line 1104  for (;;)
1104    
1105      if (rc != 0)      if (rc != 0)
1106        {        {
1107        (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));        (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1108        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);        fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1109        goto SKIP_DATA;        goto SKIP_DATA;
1110        }        }
# Line 431  for (;;) Line 1113  for (;;)
1113    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
1114    
1115    else    else
1116    #endif  /* !defined NOPOSIX */
1117    
1118      {      {
1119      if (timeit)      if (timeit)
1120        {        {
1121        register int i;        register int i;
1122        clock_t time_taken;        clock_t time_taken;
1123        clock_t start_time = clock();        clock_t start_time = clock();
1124        for (i = 0; i < 4000; i++)        for (i = 0; i < LOOPREPEAT; i++)
1125          {          {
1126          re = pcre_compile((char *)p, options, &error, &erroroffset);          re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1127          if (re != NULL) free(re);          if (re != NULL) free(re);
1128          }          }
1129        time_taken = clock() - start_time;        time_taken = clock() - start_time;
1130        fprintf(outfile, "Compile time %.2f milliseconds\n",        fprintf(outfile, "Compile time %.3f milliseconds\n",
1131          ((double)time_taken)/(4 * CLOCKS_PER_SEC));          (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1132              (double)CLOCKS_PER_SEC);
1133        }        }
1134    
1135      re = pcre_compile((char *)p, options, &error, &erroroffset);      re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1136    
1137      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
1138      if non-interactive. */      if non-interactive. */
# Line 460  for (;;) Line 1145  for (;;)
1145          {          {
1146          for (;;)          for (;;)
1147            {            {
1148            if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)            if (extend_inputline(infile, buffer) == NULL)
1149              goto END_OFF;              {
1150                done = 1;
1151                goto CONTINUE;
1152                }
1153            len = (int)strlen((char *)buffer);            len = (int)strlen((char *)buffer);
1154            while (len > 0 && isspace(buffer[len-1])) len--;            while (len > 0 && isspace(buffer[len-1])) len--;
1155            if (len == 0) break;            if (len == 0) break;
1156            }            }
1157          fprintf(outfile, "\n");          fprintf(outfile, "\n");
1158          }          }
1159        continue;        goto CONTINUE;
1160        }        }
1161    
1162      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
1163        info-returning functions. The old one has a limited interface and
1164        returns only limited data. Check that it agrees with the newer one. */
1165    
1166        if (log_store)
1167          fprintf(outfile, "Memory allocation (code space): %d\n",
1168            (int)(gotten_store -
1169                  sizeof(real_pcre) -
1170                  ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1171    
1172        /* Extract the size for possible writing before possibly flipping it,
1173        and remember the store that was got. */
1174    
1175        true_size = ((real_pcre *)re)->size;
1176        regex_gotten_store = gotten_store;
1177    
1178        /* If /S was present, study the regexp to generate additional info to
1179        help with the matching. */
1180    
1181        if (do_study)
1182          {
1183          if (timeit)
1184            {
1185            register int i;
1186            clock_t time_taken;
1187            clock_t start_time = clock();
1188            for (i = 0; i < LOOPREPEAT; i++)
1189              extra = pcre_study(re, study_options, &error);
1190            time_taken = clock() - start_time;
1191            if (extra != NULL) free(extra);
1192            fprintf(outfile, "  Study time %.3f milliseconds\n",
1193              (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1194                (double)CLOCKS_PER_SEC);
1195            }
1196          extra = pcre_study(re, study_options, &error);
1197          if (error != NULL)
1198            fprintf(outfile, "Failed to study: %s\n", error);
1199          else if (extra != NULL)
1200            true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1201          }
1202    
1203        /* If the 'F' option was present, we flip the bytes of all the integer
1204        fields in the regex data block and the study block. This is to make it
1205        possible to test PCRE's handling of byte-flipped patterns, e.g. those
1206        compiled on a different architecture. */
1207    
1208        if (do_flip)
1209          {
1210          real_pcre *rre = (real_pcre *)re;
1211          rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1212          rre->size = byteflip(rre->size, sizeof(rre->size));
1213          rre->options = byteflip(rre->options, sizeof(rre->options));
1214          rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1215          rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1216          rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1217          rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1218          rre->name_table_offset = byteflip(rre->name_table_offset,
1219            sizeof(rre->name_table_offset));
1220          rre->name_entry_size = byteflip(rre->name_entry_size,
1221            sizeof(rre->name_entry_size));
1222          rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1223    
1224          if (extra != NULL)
1225            {
1226            pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1227            rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1228            rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1229            }
1230          }
1231    
1232        /* Extract information from the compiled data if required */
1233    
1234        SHOW_INFO:
1235    
1236        if (do_showinfo)
1237          {
1238          unsigned long int get_options, all_options;
1239    #if !defined NOINFOCHECK
1240          int old_first_char, old_options, old_count;
1241    #endif
1242          int count, backrefmax, first_char, need_char;
1243          int nameentrysize, namecount;
1244          const uschar *nametable;
1245    
1246          if (do_debug)
1247            {
1248            fprintf(outfile, "------------------------------------------------------------------\n");
1249            pcre_printint(re, outfile);
1250            }
1251    
1252          new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1253          new_info(re, NULL, PCRE_INFO_SIZE, &size);
1254          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1255          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1256          new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1257          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1258          new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1259          new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1260          new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1261    
1262    #if !defined NOINFOCHECK
1263          old_count = pcre_info(re, &old_options, &old_first_char);
1264          if (count < 0) fprintf(outfile,
1265            "Error %d from pcre_info()\n", count);
1266          else
1267            {
1268            if (old_count != count) fprintf(outfile,
1269              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1270                old_count);
1271    
1272            if (old_first_char != first_char) fprintf(outfile,
1273              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1274                first_char, old_first_char);
1275    
1276            if (old_options != (int)get_options) fprintf(outfile,
1277              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1278                get_options, old_options);
1279            }
1280    #endif
1281    
1282          if (size != regex_gotten_store) fprintf(outfile,
1283            "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1284            (int)size, (int)regex_gotten_store);
1285    
1286          fprintf(outfile, "Capturing subpattern count = %d\n", count);
1287          if (backrefmax > 0)
1288            fprintf(outfile, "Max back reference = %d\n", backrefmax);
1289    
1290          if (namecount > 0)
1291            {
1292            fprintf(outfile, "Named capturing subpatterns:\n");
1293            while (namecount-- > 0)
1294              {
1295              fprintf(outfile, "  %s %*s%3d\n", nametable + 2,
1296                nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1297                GET2(nametable, 0));
1298              nametable += nameentrysize;
1299              }
1300            }
1301    
1302          /* The NOPARTIAL bit is a private bit in the options, so we have
1303          to fish it out via out back door */
1304    
1305          all_options = ((real_pcre *)re)->options;
1306          if (do_flip)
1307            {
1308            all_options = byteflip(all_options, sizeof(all_options));
1309             }
1310    
1311          if ((all_options & PCRE_NOPARTIAL) != 0)
1312            fprintf(outfile, "Partial matching not supported\n");
1313    
1314          if (get_options == 0) fprintf(outfile, "No options\n");
1315            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1316              ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1317              ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1318              ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1319              ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1320              ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1321              ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1322              ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1323              ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1324              ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1325              ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1326              ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1327              ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1328              ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1329    
1330          switch (get_options & PCRE_NEWLINE_CRLF)
1331            {
1332            case PCRE_NEWLINE_CR:
1333            fprintf(outfile, "Forced newline sequence: CR\n");
1334            break;
1335    
1336            case PCRE_NEWLINE_LF:
1337            fprintf(outfile, "Forced newline sequence: LF\n");
1338            break;
1339    
1340            case PCRE_NEWLINE_CRLF:
1341            fprintf(outfile, "Forced newline sequence: CRLF\n");
1342            break;
1343    
1344            default:
1345            break;
1346            }
1347    
1348          if (first_char == -1)
1349            {
1350            fprintf(outfile, "First char at start or follows newline\n");
1351            }
1352          else if (first_char < 0)
1353            {
1354            fprintf(outfile, "No first char\n");
1355            }
1356          else
1357            {
1358            int ch = first_char & 255;
1359            const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1360              "" : " (caseless)";
1361            if (isprint(ch))
1362              fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1363            else
1364              fprintf(outfile, "First char = %d%s\n", ch, caseless);
1365            }
1366    
1367      if (showinfo || do_debug)        if (need_char < 0)
1368        {          {
1369        int first_char, count;          fprintf(outfile, "No need char\n");
1370            }
1371          else
1372            {
1373            int ch = need_char & 255;
1374            const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1375              "" : " (caseless)";
1376            if (isprint(ch))
1377              fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1378            else
1379              fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1380            }
1381    
1382        if (debug || do_debug) print_internals(re);        /* Don't output study size; at present it is in any case a fixed
1383          value, but it varies, depending on the computer architecture, and
1384          so messes up the test suite. (And with the /F option, it might be
1385          flipped.) */
1386    
1387        count = pcre_info(re, &options, &first_char);        if (do_study)
       if (count < 0) fprintf(outfile,  
         "Error %d while reading info\n", count);  
       else  
1388          {          {
1389          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (extra == NULL)
1390          if (options == 0) fprintf(outfile, "No options\n");            fprintf(outfile, "Study returned NULL\n");
           else fprintf(outfile, "Options:%s%s%s%s%s%s%s\n",  
             ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
             ((options & PCRE_CASELESS) != 0)? " caseless" : "",  
             ((options & PCRE_EXTENDED) != 0)? " extended" : "",  
             ((options & PCRE_MULTILINE) != 0)? " multiline" : "",  
             ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
             ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",  
             ((options & PCRE_EXTRA) != 0)? " extra" : "");  
         if (first_char == -1)  
           {  
           fprintf(outfile, "First char at start or follows \\n\n");  
           }  
         else if (first_char < 0)  
           {  
           fprintf(outfile, "No first char\n");  
           }  
1391          else          else
1392            {            {
1393            if (isprint(first_char))            uschar *start_bits = NULL;
1394              fprintf(outfile, "First char = \'%c\'\n", first_char);            new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1395    
1396              if (start_bits == NULL)
1397                fprintf(outfile, "No starting byte set\n");
1398            else            else
1399              fprintf(outfile, "First char = %d\n", first_char);              {
1400                int i;
1401                int c = 24;
1402                fprintf(outfile, "Starting byte set: ");
1403                for (i = 0; i < 256; i++)
1404                  {
1405                  if ((start_bits[i/8] & (1<<(i&7))) != 0)
1406                    {
1407                    if (c > 75)
1408                      {
1409                      fprintf(outfile, "\n  ");
1410                      c = 2;
1411                      }
1412                    if (isprint(i) && i != ' ')
1413                      {
1414                      fprintf(outfile, "%c ", i);
1415                      c += 2;
1416                      }
1417                    else
1418                      {
1419                      fprintf(outfile, "\\x%02x ", i);
1420                      c += 5;
1421                      }
1422                    }
1423                  }
1424                fprintf(outfile, "\n");
1425                }
1426            }            }
1427          }          }
1428        }        }
1429    
1430      /* If /S was present, study the regexp to generate additional info to      /* If the '>' option was present, we write out the regex to a file, and
1431      help with the matching. */      that is all. The first 8 bytes of the file are the regex length and then
1432        the study length, in big-endian order. */
1433    
1434      if (do_study)      if (to_file != NULL)
1435        {        {
1436        if (timeit)        FILE *f = fopen((char *)to_file, "wb");
1437          if (f == NULL)
1438          {          {
1439          register int i;          fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
         clock_t time_taken;  
         clock_t start_time = clock();  
         for (i = 0; i < 4000; i++)  
           extra = pcre_study(re, study_options, &error);  
         time_taken = clock() - start_time;  
         if (extra != NULL) free(extra);  
         fprintf(outfile, "  Study time %.2f milliseconds\n",  
           ((double)time_taken)/(4 * CLOCKS_PER_SEC));  
1440          }          }
1441          else
       extra = pcre_study(re, study_options, &error);  
       if (error != NULL)  
         fprintf(outfile, "Failed to study: %s\n", error);  
       else if (extra == NULL)  
         fprintf(outfile, "Study returned NULL\n");  
   
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
       else if (showinfo || do_debug)  
1442          {          {
1443          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar sbuf[8];
1444          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          sbuf[0] = (true_size >> 24)  & 255;
1445            fprintf(outfile, "No starting character set\n");          sbuf[1] = (true_size >> 16)  & 255;
1446            sbuf[2] = (true_size >>  8)  & 255;
1447            sbuf[3] = (true_size)  & 255;
1448    
1449            sbuf[4] = (true_study_size >> 24)  & 255;
1450            sbuf[5] = (true_study_size >> 16)  & 255;
1451            sbuf[6] = (true_study_size >>  8)  & 255;
1452            sbuf[7] = (true_study_size)  & 255;
1453    
1454            if (fwrite(sbuf, 1, 8, f) < 8 ||
1455                fwrite(re, 1, true_size, f) < true_size)
1456              {
1457              fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1458              }
1459          else          else
1460            {            {
1461            int i;            fprintf(outfile, "Compiled regex written to %s\n", to_file);
1462            int c = 24;            if (extra != NULL)
           fprintf(outfile, "Starting character set: ");  
           for (i = 0; i < 256; i++)  
1463              {              {
1464              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if (fwrite(extra->study_data, 1, true_study_size, f) <
1465                    true_study_size)
1466                {                {
1467                if (c > 75)                fprintf(outfile, "Write error on %s: %s\n", to_file,
1468                  {                  strerror(errno));
                 fprintf(outfile, "\n  ");  
                 c = 2;  
                 }  
               if (isprint(i) && i != ' ')  
                 {  
                 fprintf(outfile, "%c ", i);  
                 c += 2;  
                 }  
               else  
                 {  
                 fprintf(outfile, "\\x%02x ", i);  
                 c += 5;  
                 }  
1469                }                }
1470                else fprintf(outfile, "Study data written to %s\n", to_file);
1471              }              }
           fprintf(outfile, "\n");  
1472            }            }
1473            fclose(f);
1474          }          }
1475    
1476          new_free(re);
1477          if (extra != NULL) new_free(extra);
1478          if (tables != NULL) new_free((void *)tables);
1479          continue;  /* With next regex */
1480        }        }
1481      }      }        /* End of non-POSIX compile */
1482    
1483    /* Read data lines and test them */    /* Read data lines and test them */
1484    
1485    for (;;)    for (;;)
1486      {      {
1487      unsigned char *pp;      uschar *q;
1488        uschar *bptr = dbuffer;
1489        int *use_offsets = offsets;
1490        int use_size_offsets = size_offsets;
1491        int callout_data = 0;
1492        int callout_data_set = 0;
1493      int count, c;      int count, c;
1494      int offsets[30];      int copystrings = 0;
1495      int size_offsets = sizeof(offsets)/sizeof(int);      int find_match_limit = 0;
1496        int getstrings = 0;
1497        int getlist = 0;
1498        int gmatched = 0;
1499        int start_offset = 0;
1500        int g_notempty = 0;
1501        int use_dfa = 0;
1502    
1503      options = 0;      options = 0;
1504    
1505      if (infile == stdin) printf("  data> ");      *copynames = 0;
1506      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL) goto END_OFF;      *getnames = 0;
1507      if (infile != stdin) fprintf(outfile, (char *)buffer);  
1508        copynamesptr = copynames;
1509        getnamesptr = getnames;
1510    
1511        pcre_callout = callout;
1512        first_callout = 1;
1513        callout_extra = 0;
1514        callout_count = 0;
1515        callout_fail_count = 999999;
1516        callout_fail_id = -1;
1517        show_malloc = 0;
1518    
1519        if (extra != NULL) extra->flags &=
1520          ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1521    
1522        len = 0;
1523        for (;;)
1524          {
1525          if (infile == stdin) printf("data> ");
1526          if (extend_inputline(infile, buffer + len) == NULL)
1527            {
1528            if (len > 0) break;
1529            done = 1;
1530            goto CONTINUE;
1531            }
1532          if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1533          len = (int)strlen((char *)buffer);
1534          if (buffer[len-1] == '\n') break;
1535          }
1536    
     len = (int)strlen((char *)buffer);  
1537      while (len > 0 && isspace(buffer[len-1])) len--;      while (len > 0 && isspace(buffer[len-1])) len--;
1538      buffer[len] = 0;      buffer[len] = 0;
1539      if (len == 0) break;      if (len == 0) break;
# Line 599  for (;;) Line 1541  for (;;)
1541      p = buffer;      p = buffer;
1542      while (isspace(*p)) p++;      while (isspace(*p)) p++;
1543    
1544      pp = dbuffer;      q = dbuffer;
1545      while ((c = *p++) != 0)      while ((c = *p++) != 0)
1546        {        {
1547        int i = 0;        int i = 0;
1548        int n = 0;        int n = 0;
1549    
1550        if (c == '\\') switch ((c = *p++))        if (c == '\\') switch ((c = *p++))
1551          {          {
1552          case 'a': c =    7; break;          case 'a': c =    7; break;
# Line 620  for (;;) Line 1563  for (;;)
1563          c -= '0';          c -= '0';
1564          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')          while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1565            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
1566    
1567    #if !defined NOUTF8
1568            if (use_utf8 && c > 255)
1569              {
1570              unsigned char buff8[8];
1571              int ii, utn;
1572              utn = ord2utf8(c, buff8);
1573              for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1574              c = buff8[ii];   /* Last byte */
1575              }
1576    #endif
1577          break;          break;
1578    
1579          case 'x':          case 'x':
1580    
1581            /* Handle \x{..} specially - new Perl thing for utf8 */
1582    
1583    #if !defined NOUTF8
1584            if (*p == '{')
1585              {
1586              unsigned char *pt = p;
1587              c = 0;
1588              while (isxdigit(*(++pt)))
1589                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1590              if (*pt == '}')
1591                {
1592                unsigned char buff8[8];
1593                int ii, utn;
1594                utn = ord2utf8(c, buff8);
1595                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1596                c = buff8[ii];   /* Last byte */
1597                p = pt + 1;
1598                break;
1599                }
1600              /* Not correct form; fall through */
1601              }
1602    #endif
1603    
1604            /* Ordinary \x */
1605    
1606          c = 0;          c = 0;
1607          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
1608            {            {
# Line 631  for (;;) Line 1611  for (;;)
1611            }            }
1612          break;          break;
1613    
1614          case 0:   /* Allows for an empty line */          case 0:   /* \ followed by EOF allows for an empty line */
1615          p--;          p--;
1616          continue;          continue;
1617    
1618            case '>':
1619            while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1620            continue;
1621    
1622          case 'A':  /* Option setting */          case 'A':  /* Option setting */
1623          options |= PCRE_ANCHORED;          options |= PCRE_ANCHORED;
1624          continue;          continue;
# Line 643  for (;;) Line 1627  for (;;)
1627          options |= PCRE_NOTBOL;          options |= PCRE_NOTBOL;
1628          continue;          continue;
1629    
1630          case 'E':          case 'C':
1631          options |= PCRE_DOLLAR_ENDONLY;          if (isdigit(*p))    /* Set copy string */
1632              {
1633              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1634              copystrings |= 1 << n;
1635              }
1636            else if (isalnum(*p))
1637              {
1638              uschar *npp = copynamesptr;
1639              while (isalnum(*p)) *npp++ = *p++;
1640              *npp++ = 0;
1641              *npp = 0;
1642              n = pcre_get_stringnumber(re, (char *)copynamesptr);
1643              if (n < 0)
1644                fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1645              copynamesptr = npp;
1646              }
1647            else if (*p == '+')
1648              {
1649              callout_extra = 1;
1650              p++;
1651              }
1652            else if (*p == '-')
1653              {
1654              pcre_callout = NULL;
1655              p++;
1656              }
1657            else if (*p == '!')
1658              {
1659              callout_fail_id = 0;
1660              p++;
1661              while(isdigit(*p))
1662                callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1663              callout_fail_count = 0;
1664              if (*p == '!')
1665                {
1666                p++;
1667                while(isdigit(*p))
1668                  callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1669                }
1670              }
1671            else if (*p == '*')
1672              {
1673              int sign = 1;
1674              callout_data = 0;
1675              if (*(++p) == '-') { sign = -1; p++; }
1676              while(isdigit(*p))
1677                callout_data = callout_data * 10 + *p++ - '0';
1678              callout_data *= sign;
1679              callout_data_set = 1;
1680              }
1681            continue;
1682    
1683    #if !defined NODFA
1684            case 'D':
1685    #if !defined NOPOSIX
1686            if (posix || do_posix)
1687              printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1688            else
1689    #endif
1690              use_dfa = 1;
1691            continue;
1692    
1693            case 'F':
1694            options |= PCRE_DFA_SHORTEST;
1695            continue;
1696    #endif
1697    
1698            case 'G':
1699            if (isdigit(*p))
1700              {
1701              while(isdigit(*p)) n = n * 10 + *p++ - '0';
1702              getstrings |= 1 << n;
1703              }
1704            else if (isalnum(*p))
1705              {
1706              uschar *npp = getnamesptr;
1707              while (isalnum(*p)) *npp++ = *p++;
1708              *npp++ = 0;
1709              *npp = 0;
1710              n = pcre_get_stringnumber(re, (char *)getnamesptr);
1711              if (n < 0)
1712                fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1713              getnamesptr = npp;
1714              }
1715          continue;          continue;
1716    
1717          case 'I':          case 'L':
1718          options |= PCRE_CASELESS;          getlist = 1;
1719          continue;          continue;
1720    
1721          case 'M':          case 'M':
1722          options |= PCRE_MULTILINE;          find_match_limit = 1;
1723          continue;          continue;
1724    
1725          case 'S':          case 'N':
1726          options |= PCRE_DOTALL;          options |= PCRE_NOTEMPTY;
1727          continue;          continue;
1728    
1729          case 'O':          case 'O':
1730          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
1731          if (n <= sizeof(offsets)/sizeof(int)) size_offsets = n;          if (n > size_offsets_max)
1732              {
1733              size_offsets_max = n;
1734              free(offsets);
1735              use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1736              if (offsets == NULL)
1737                {
1738                printf("** Failed to get %d bytes of memory for offsets vector\n",
1739                  size_offsets_max * sizeof(int));
1740                yield = 1;
1741                goto EXIT;
1742                }
1743              }
1744            use_size_offsets = n;
1745            if (n == 0) use_offsets = NULL;   /* Ensures it can't write to it */
1746            continue;
1747    
1748            case 'P':
1749            options |= PCRE_PARTIAL;
1750            continue;
1751    
1752            case 'Q':
1753            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1754            if (extra == NULL)
1755              {
1756              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1757              extra->flags = 0;
1758              }
1759            extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1760            extra->match_limit_recursion = n;
1761            continue;
1762    
1763            case 'q':
1764            while(isdigit(*p)) n = n * 10 + *p++ - '0';
1765            if (extra == NULL)
1766              {
1767              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1768              extra->flags = 0;
1769              }
1770            extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1771            extra->match_limit = n;
1772            continue;
1773    
1774    #if !defined NODFA
1775            case 'R':
1776            options |= PCRE_DFA_RESTART;
1777            continue;
1778    #endif
1779    
1780            case 'S':
1781            show_malloc = 1;
1782          continue;          continue;
1783    
1784          case 'Z':          case 'Z':
1785          options |= PCRE_NOTEOL;          options |= PCRE_NOTEOL;
1786          continue;          continue;
1787    
1788            case '?':
1789            options |= PCRE_NO_UTF8_CHECK;
1790            continue;
1791    
1792            case '<':
1793              {
1794              int x = check_newline(p, outfile);
1795              if (x == 0) goto NEXT_DATA;
1796              options |= x;
1797              while (*p++ != '>');
1798              }
1799            continue;
1800          }          }
1801        *pp++ = c;        *q++ = c;
1802          }
1803        *q = 0;
1804        len = q - dbuffer;
1805    
1806        if ((all_use_dfa || use_dfa) && find_match_limit)
1807          {
1808          printf("**Match limit not relevant for DFA matching: ignored\n");
1809          find_match_limit = 0;
1810        }        }
     *pp = 0;  
     len = pp - dbuffer;  
1811    
1812      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1813      support timing. */      support timing or playing with the match limit or callout data. */
1814    
1815    #if !defined NOPOSIX
1816      if (posix || do_posix)      if (posix || do_posix)
1817        {        {
1818        int rc;        int rc;
1819        int eflags = 0;        int eflags = 0;
1820        regmatch_t pmatch[30];        regmatch_t *pmatch = NULL;
1821          if (use_size_offsets > 0)
1822            pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1823        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1824        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1825    
1826        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
         pmatch, eflags);  
1827    
1828        if (rc != 0)        if (rc != 0)
1829          {          {
1830          (void)regerror(rc, &preg, (char *)buffer, sizeof(buffer));          (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1831          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);          fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1832          }          }
1833          else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1834                  != 0)
1835            {
1836            fprintf(outfile, "Matched with REG_NOSUB\n");
1837            }
1838        else        else
1839          {          {
1840          int i;          size_t i;
1841          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < (size_t)use_size_offsets; i++)
1842            {            {
1843            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1844              {              {
1845              fprintf(outfile, "%2d: ", i);              fprintf(outfile, "%2d: ", (int)i);
1846              pchars(dbuffer + pmatch[i].rm_so,              (void)pchars(dbuffer + pmatch[i].rm_so,
1847                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1848              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1849                if (i == 0 && do_showrest)
1850                  {
1851                  fprintf(outfile, " 0+ ");
1852                  (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
1853                    outfile);
1854                  fprintf(outfile, "\n");
1855                  }
1856              }              }
1857            }            }
1858          }          }
1859          free(pmatch);
1860        }        }
1861    
1862      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1863    
1864      else      else
1865    #endif  /* !defined NOPOSIX */
1866    
1867        for (;; gmatched++)    /* Loop for /g or /G */
1868        {        {
1869        if (timeit)        if (timeit)
1870          {          {
1871          register int i;          register int i;
1872          clock_t time_taken;          clock_t time_taken;
1873          clock_t start_time = clock();          clock_t start_time = clock();
1874          for (i = 0; i < 4000; i++)  
1875            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,  #if !defined NODFA
1876              size_offsets);          if (all_use_dfa || use_dfa)
1877              {
1878              int workspace[1000];
1879              for (i = 0; i < LOOPREPEAT; i++)
1880                count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1881                  options | g_notempty, use_offsets, use_size_offsets, workspace,
1882                  sizeof(workspace)/sizeof(int));
1883              }
1884            else
1885    #endif
1886    
1887            for (i = 0; i < LOOPREPEAT; i++)
1888              count = pcre_exec(re, extra, (char *)bptr, len,
1889                start_offset, options | g_notempty, use_offsets, use_size_offsets);
1890    
1891          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1892          fprintf(outfile, "Execute time %.2f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1893            ((double)time_taken)/(4 * CLOCKS_PER_SEC));            (((double)time_taken * 1000.0) / (double)LOOPREPEAT) /
1894                (double)CLOCKS_PER_SEC);
1895            }
1896    
1897          /* If find_match_limit is set, we want to do repeated matches with
1898          varying limits in order to find the minimum value for the match limit and
1899          for the recursion limit. */
1900    
1901          if (find_match_limit)
1902            {
1903            if (extra == NULL)
1904              {
1905              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1906              extra->flags = 0;
1907              }
1908    
1909            (void)check_match_limit(re, extra, bptr, len, start_offset,
1910              options|g_notempty, use_offsets, use_size_offsets,
1911              PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
1912              PCRE_ERROR_MATCHLIMIT, "match()");
1913    
1914            count = check_match_limit(re, extra, bptr, len, start_offset,
1915              options|g_notempty, use_offsets, use_size_offsets,
1916              PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
1917              PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
1918            }
1919    
1920          /* If callout_data is set, use the interface with additional data */
1921    
1922          else if (callout_data_set)
1923            {
1924            if (extra == NULL)
1925              {
1926              extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1927              extra->flags = 0;
1928              }
1929            extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
1930            extra->callout_data = &callout_data;
1931            count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
1932              options | g_notempty, use_offsets, use_size_offsets);
1933            extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
1934          }          }
1935    
1936        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        /* The normal case is just to do the match once, with the default
1937          size_offsets);        value of match_limit. */
1938    
1939        if (count == 0)  #if !defined NODFA
1940          else if (all_use_dfa || use_dfa)
1941          {          {
1942          fprintf(outfile, "Matched, but too many substrings\n");          int workspace[1000];
1943          count = size_offsets/2;          count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
1944              options | g_notempty, use_offsets, use_size_offsets, workspace,
1945              sizeof(workspace)/sizeof(int));
1946            if (count == 0)
1947              {
1948              fprintf(outfile, "Matched, but too many subsidiary matches\n");
1949              count = use_size_offsets/2;
1950              }
1951            }
1952    #endif
1953    
1954          else
1955            {
1956            count = pcre_exec(re, extra, (char *)bptr, len,
1957              start_offset, options | g_notempty, use_offsets, use_size_offsets);
1958            if (count == 0)
1959              {
1960              fprintf(outfile, "Matched, but too many substrings\n");
1961              count = use_size_offsets/3;
1962              }
1963          }          }
1964    
1965          /* Matched */
1966    
1967        if (count >= 0)        if (count >= 0)
1968          {          {
1969          int i;          int i;
1970          count *= 2;          for (i = 0; i < count * 2; i += 2)
         for (i = 0; i < count; i += 2)  
1971            {            {
1972            if (offsets[i] < 0)            if (use_offsets[i] < 0)
1973              fprintf(outfile, "%2d: <unset>\n", i/2);              fprintf(outfile, "%2d: <unset>\n", i/2);
1974            else            else
1975              {              {
1976              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1977              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              (void)pchars(bptr + use_offsets[i],
1978                  use_offsets[i+1] - use_offsets[i], outfile);
1979              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1980                if (i == 0)
1981                  {
1982                  if (do_showrest)
1983                    {
1984                    fprintf(outfile, " 0+ ");
1985                    (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
1986                      outfile);
1987                    fprintf(outfile, "\n");
1988                    }
1989                  }
1990                }
1991              }
1992    
1993            for (i = 0; i < 32; i++)
1994              {
1995              if ((copystrings & (1 << i)) != 0)
1996                {
1997                char copybuffer[256];
1998                int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
1999                  i, copybuffer, sizeof(copybuffer));
2000                if (rc < 0)
2001                  fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2002                else
2003                  fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2004                }
2005              }
2006    
2007            for (copynamesptr = copynames;
2008                 *copynamesptr != 0;
2009                 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2010              {
2011              char copybuffer[256];
2012              int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2013                count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2014              if (rc < 0)
2015                fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2016              else
2017                fprintf(outfile, "  C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2018              }
2019    
2020            for (i = 0; i < 32; i++)
2021              {
2022              if ((getstrings & (1 << i)) != 0)
2023                {
2024                const char *substring;
2025                int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2026                  i, &substring);
2027                if (rc < 0)
2028                  fprintf(outfile, "get substring %d failed %d\n", i, rc);
2029                else
2030                  {
2031                  fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2032                  pcre_free_substring(substring);
2033                  }
2034                }
2035              }
2036    
2037            for (getnamesptr = getnames;
2038                 *getnamesptr != 0;
2039                 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2040              {
2041              const char *substring;
2042              int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2043                count, (char *)getnamesptr, &substring);
2044              if (rc < 0)
2045                fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2046              else
2047                {
2048                fprintf(outfile, "  G %s (%d) %s\n", substring, rc, getnamesptr);
2049                pcre_free_substring(substring);
2050                }
2051              }
2052    
2053            if (getlist)
2054              {
2055              const char **stringlist;
2056              int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2057                &stringlist);
2058              if (rc < 0)
2059                fprintf(outfile, "get substring list failed %d\n", rc);
2060              else
2061                {
2062                for (i = 0; i < count; i++)
2063                  fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2064                if (stringlist[i] != NULL)
2065                  fprintf(outfile, "string list not terminated by NULL\n");
2066                /* free((void *)stringlist); */
2067                pcre_free_substring_list(stringlist);
2068              }              }
2069            }            }
2070          }          }
2071    
2072          /* There was a partial match */
2073    
2074          else if (count == PCRE_ERROR_PARTIAL)
2075            {
2076            fprintf(outfile, "Partial match");
2077    #if !defined NODFA
2078            if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2079              fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2080                bptr + use_offsets[0]);
2081    #endif
2082            fprintf(outfile, "\n");
2083            break;  /* Out of the /g loop */
2084            }
2085    
2086          /* Failed to match. If this is a /g or /G loop and we previously set
2087          g_notempty after a null match, this is not necessarily the end.
2088          We want to advance the start offset, and continue. In the case of UTF-8
2089          matching, the advance must be one character, not one byte. Fudge the
2090          offset values to achieve this. We won't be at the end of the string -
2091          that was checked before setting g_notempty. */
2092    
2093        else        else
2094          {          {
2095          if (count == -1) fprintf(outfile, "No match\n");          if (g_notempty != 0)
2096              {
2097              int onechar = 1;
2098              use_offsets[0] = start_offset;
2099              if (use_utf8)
2100                {
2101                while (start_offset + onechar < len)
2102                  {
2103                  int tb = bptr[start_offset+onechar];
2104                  if (tb <= 127) break;
2105                  tb &= 0xc0;
2106                  if (tb != 0 && tb != 0xc0) onechar++;
2107                  }
2108                }
2109              use_offsets[1] = start_offset + onechar;
2110              }
2111            else
2112              {
2113              if (count == PCRE_ERROR_NOMATCH)
2114                {
2115                if (gmatched == 0) fprintf(outfile, "No match\n");
2116                }
2117            else fprintf(outfile, "Error %d\n", count);            else fprintf(outfile, "Error %d\n", count);
2118              break;  /* Out of the /g loop */
2119              }
2120          }          }
       }  
     }  
2121    
2122          /* If not /g or /G we are done */
2123    
2124          if (!do_g && !do_G) break;
2125    
2126          /* If we have matched an empty string, first check to see if we are at
2127          the end of the subject. If so, the /g loop is over. Otherwise, mimic
2128          what Perl's /g options does. This turns out to be rather cunning. First
2129          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2130          same point. If this fails (picked up above) we advance to the next
2131          character. */
2132    
2133          g_notempty = 0;
2134          if (use_offsets[0] == use_offsets[1])
2135            {
2136            if (use_offsets[0] == len) break;
2137            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2138            }
2139    
2140          /* For /g, update the start offset, leaving the rest alone */
2141    
2142          if (do_g) start_offset = use_offsets[1];
2143    
2144          /* For /G, update the pointer and length */
2145    
2146          else
2147            {
2148            bptr += use_offsets[1];
2149            len -= use_offsets[1];
2150            }
2151          }  /* End of loop for /g and /G */
2152    
2153        NEXT_DATA: continue;
2154        }    /* End of loop for data lines */
2155    
2156      CONTINUE:
2157    
2158    #if !defined NOPOSIX
2159    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
2160    if (re != NULL) free(re);  #endif
2161    if (extra != NULL) free(extra);  
2162      if (re != NULL) new_free(re);
2163      if (extra != NULL) new_free(extra);
2164      if (tables != NULL)
2165        {
2166        new_free((void *)tables);
2167        setlocale(LC_CTYPE, "C");
2168        }
2169    }    }
2170    
2171  END_OFF:  if (infile == stdin) fprintf(outfile, "\n");
2172  fprintf(outfile, "\n");  
2173  return 0;  EXIT:
2174    
2175    if (infile != NULL && infile != stdin) fclose(infile);
2176    if (outfile != NULL && outfile != stdout) fclose(outfile);
2177    
2178    free(buffer);
2179    free(dbuffer);
2180    free(pbuffer);
2181    free(offsets);
2182    
2183    return yield;
2184  }  }
2185    
2186  /* End */  /* End of pcretest.c */

Legend:
Removed from v.3  
changed lines
  Added in v.91

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12