/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 35 by nigel, Sat Feb 24 21:39:05 2007 UTC
# Line 9  the file Tech.Notes for some information Line 9  the file Tech.Notes for some information
9    
10  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
11    
12             Copyright (c) 1997 University of Cambridge             Copyright (c) 1997-1999 University of Cambridge
13    
14  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
15  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 25  restrictions: Line 25  restrictions:
25    
26  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
27     misrepresented as being the original software.     misrepresented as being the original software.
28    
29    4. If PCRE is embedded in any software that is released under the GNU
30       General Purpose Licence (GPL), then the terms of that licence shall
31       supersede any condition above with which it is incompatible.
32  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
33  */  */
34    
# Line 33  restrictions: Line 37  restrictions:
37    
38  /* #define DEBUG */  /* #define DEBUG */
39    
40    /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
41    inline, and there are *still* stupid compilers about that don't like indented
42    pre-processor statements. I suppose it's only been 10 years... */
43    
44    #ifdef DEBUG
45    #define DPRINTF(p) printf p
46    #else
47    #define DPRINTF(p) /*nothing*/
48    #endif
49    
50  /* Include the internals header, which itself includes Standard C headers plus  /* Include the internals header, which itself includes Standard C headers plus
51  the external pcre header. */  the external pcre header. */
# Line 40  the external pcre header. */ Line 53  the external pcre header. */
53  #include "internal.h"  #include "internal.h"
54    
55    
56    /* Allow compilation as C++ source code, should anybody want to do that. */
57    
58    #ifdef __cplusplus
59    #define class pcre_class
60    #endif
61    
62    
63    /* Number of items on the nested bracket stacks at compile time. This should
64    not be set greater than 200. */
65    
66    #define BRASTACK_SIZE 200
67    
68    
69  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
70    
71  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
72  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static const char rep_max[] = { 0, 0, 0, 0, 1, 1 };
73    
74  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
75    
76  #ifdef DEBUG  #ifdef DEBUG
77  static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d",  static const char *OP_names[] = {
78    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
79    "not",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
80      "Opt", "^", "$", "Any", "chars", "not",
81    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
82    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
83    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
84    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
85    "class", "Ref",    "class", "Ref",
86    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
87      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
88    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
89  };  };
90  #endif  #endif
# Line 66  are simple data values; negative values Line 94  are simple data values; negative values
94  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
95  is invalid. */  is invalid. */
96    
97  static short int escapes[] = {  static const short int escapes[] = {
98      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */      0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
99      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */      0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
100    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */    '@', -ESC_A, -ESC_B,      0, -ESC_D,      0,      0,      0,   /* @ - G */
# Line 76  static short int escapes[] = { Line 104  static short int escapes[] = {
104    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */
105      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */
106      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */
107      0,      0,      0                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
108  };  };
109    
110  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
111    
112  static BOOL compile_regex(int, int *,uschar **,uschar **,char **);  static BOOL
113      compile_regex(int, int, int *, uschar **, const uschar **, const char **,
114  /* Structure for passing "static" information around between the functions      BOOL, int, compile_data *);
 doing the matching, so that they are thread-safe. */  
   
 typedef struct match_data {  
   int    errorcode;             /* As it says */  
   int   *offset_vector;         /* Offset vector */  
   int    offset_end;            /* One past the end */  
   BOOL   offset_overflow;       /* Set if too many extractions */  
   BOOL   caseless;              /* Case-independent flag */  
   BOOL   runtime_caseless;      /* Caseless forced at run time */  
   BOOL   multiline;             /* Multiline flag */  
   BOOL   notbol;                /* NOTBOL flag */  
   BOOL   noteol;                /* NOTEOL flag */  
   BOOL   dotall;                /* Dot matches any char */  
   BOOL   endonly;               /* Dollar not before final \n */  
   uschar *start_subject;        /* Start of the subject string */  
   uschar *end_subject;          /* End of the subject string */  
   jmp_buf fail_env;             /* Environment for longjump() break out */  
   uschar *end_match_ptr;        /* Subject position at end match */  
   int     end_offset_top;       /* Highwater mark at end of match */  
 } match_data;  
115    
116    
117    
# Line 123  void (*pcre_free)(void *) = free; Line 131  void (*pcre_free)(void *) = free;
131    
132    
133  /*************************************************  /*************************************************
134    *             Default character tables           *
135    *************************************************/
136    
137    /* A default set of character tables is included in the PCRE binary. Its source
138    is built by the maketables auxiliary program, which uses the default C ctypes
139    functions, and put in the file chartables.c. These tables are used by PCRE
140    whenever the caller of pcre_compile() does not provide an alternate set of
141    tables. */
142    
143    #include "chartables.c"
144    
145    
146    
147    /*************************************************
148  *          Return version string                 *  *          Return version string                 *
149  *************************************************/  *************************************************/
150    
151  char *  const char *
152  pcre_version(void)  pcre_version(void)
153  {  {
154  return PCRE_VERSION;  return PCRE_VERSION;
# Line 156  Returns: number of identifying ex Line 178  Returns: number of identifying ex
178  int  int
179  pcre_info(const pcre *external_re, int *optptr, int *first_char)  pcre_info(const pcre *external_re, int *optptr, int *first_char)
180  {  {
181  real_pcre *re = (real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
182  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
183  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
184  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);
# Line 186  Arguments: Line 208  Arguments:
208  Returns:     nothing  Returns:     nothing
209  */  */
210    
211  static pchars(uschar *p, int length, BOOL is_subject, match_data *md)  static void
212    pchars(const uschar *p, int length, BOOL is_subject, match_data *md)
213  {  {
214  int c;  int c;
215  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;  if (is_subject && length > md->end_subject - p) length = md->end_subject - p;
# Line 199  while (length-- > 0) Line 222  while (length-- > 0)
222    
223    
224  /*************************************************  /*************************************************
 *         Check subpattern for empty operand     *  
 *************************************************/  
   
 /* This function checks a bracketed subpattern to see if any of the paths  
 through it could match an empty string. This is used to diagnose an error if  
 such a subpattern is followed by a quantifier with an unlimited upper bound.  
   
 Argument:  
   code      points to the opening bracket  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 could_be_empty(uschar *code)  
 {  
 do {  
   uschar *cc = code + 3;  
   
   /* Scan along the opcodes for this branch; as soon as we find something  
   that matches a non-empty string, break out and advance to test the next  
   branch. If we get to the end of the branch, return TRUE for the whole  
   sub-expression. */  
   
   for (;;)  
     {  
     /* Test an embedded subpattern; if it could not be empty, break the  
     loop. Otherwise carry on in the branch. */  
   
     if ((int)(*cc) >= OP_BRA)  
       {  
       if (!could_be_empty(cc)) break;  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       }  
   
     else switch (*cc)  
       {  
       /* Reached end of a branch: the subpattern may match the empty string */  
   
       case OP_ALT:  
       case OP_KET:  
       case OP_KETRMAX:  
       case OP_KETRMIN:  
       return TRUE;  
   
       /* Skip over assertive subpatterns */  
   
       case OP_ASSERT:  
       case OP_ASSERT_NOT:  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       break;  
   
       /* Skip over things that don't match chars */  
   
       case OP_SOD:  
       case OP_EOD:  
       case OP_CIRC:  
       case OP_DOLL:  
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
       case OP_NOT_WORD_BOUNDARY:  
       case OP_WORD_BOUNDARY:  
       cc++;  
       break;  
   
       /* Skip over simple repeats with zero lower bound */  
   
       case OP_STAR:  
       case OP_MINSTAR:  
       case OP_QUERY:  
       case OP_MINQUERY:  
       case OP_TYPESTAR:  
       case OP_TYPEMINSTAR:  
       case OP_TYPEQUERY:  
       case OP_TYPEMINQUERY:  
       cc += 2;  
       break;  
   
       /* Skip over UPTOs (lower bound is zero) */  
   
       case OP_UPTO:  
       case OP_MINUPTO:  
       case OP_TYPEUPTO:  
       case OP_TYPEMINUPTO:  
       cc += 4;  
       break;  
   
       /* Check a class or a back reference for a zero minimum */  
   
       case OP_CLASS:  
       case OP_REF:  
       cc += (*cc == OP_REF)? 2 : 4 + 2 * cc[2] + cc[3];  
   
       switch (*cc)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         cc++;  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;  
         cc += 3;  
         break;  
   
         default:  
         goto NEXT_BRANCH;  
         }  
       break;  
   
       /* Anything else matches at least one character */  
   
       default:  
       goto NEXT_BRANCH;  
       }  
     }  
   
   NEXT_BRANCH:  
   code += (code[1] << 8) + code[2];  
   }  
 while (*code == OP_ALT);  
   
 /* No branches match the empty string */  
   
 return FALSE;  
 }  
   
   
   
 /*************************************************  
225  *            Handle escapes                      *  *            Handle escapes                      *
226  *************************************************/  *************************************************/
227    
# Line 349  Arguments: Line 237  Arguments:
237    bracount   number of previous extracting brackets    bracount   number of previous extracting brackets
238    options    the options bits    options    the options bits
239    isclass    TRUE if inside a character class    isclass    TRUE if inside a character class
240      cd         pointer to char tables block
241    
242  Returns:     zero or positive => a data character  Returns:     zero or positive => a data character
243               negative => a special escape sequence               negative => a special escape sequence
# Line 356  Returns: zero or positive => a data Line 245  Returns: zero or positive => a data
245  */  */
246    
247  static int  static int
248  check_escape(uschar **ptrptr, char **errorptr, int bracount, int options,  check_escape(const uschar **ptrptr, const char **errorptr, int bracount,
249    BOOL isclass)    int options, BOOL isclass, compile_data *cd)
250  {  {
251  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
252  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */  int c = *(++ptr) & 255;   /* Ensure > 0 on signed-char systems */
253  int i;  int i;
254    
# Line 378  else if ((i = escapes[c - '0']) != 0) c Line 267  else if ((i = escapes[c - '0']) != 0) c
267    
268  else  else
269    {    {
270    uschar *oldptr;    const uschar *oldptr;
271    switch (c)    switch (c)
272      {      {
273      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 400  else Line 289  else
289        {        {
290        oldptr = ptr;        oldptr = ptr;
291        c -= '0';        c -= '0';
292        while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0)        while ((cd->ctypes[ptr[1]] & ctype_digit) != 0)
293          c = c * 10 + *(++ptr) - '0';          c = c * 10 + *(++ptr) - '0';
294        if (c < 10 || c <= bracount)        if (c < 10 || c <= bracount)
295          {          {
# Line 426  else Line 315  else
315    
316      case '0':      case '0':
317      c -= '0';      c -= '0';
318      while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 &&      while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 &&
319        ptr[1] != '8' && ptr[1] != '9')        ptr[1] != '8' && ptr[1] != '9')
320          c = c * 8 + *(++ptr) - '0';          c = c * 8 + *(++ptr) - '0';
321      break;      break;
# Line 435  else Line 324  else
324    
325      case 'x':      case 'x':
326      c = 0;      c = 0;
327      while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0)
328        {        {
329        ptr++;        ptr++;
330        c = c * 16 + pcre_lcc[*ptr] -        c = c * 16 + cd->lcc[*ptr] -
331          (((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');          (((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W');
332        }        }
333      break;      break;
334    
# Line 453  else Line 342  else
342    
343      /* A letter is upper-cased; then the 0x40 bit is flipped */      /* A letter is upper-cased; then the 0x40 bit is flipped */
344    
345      if (c >= 'a' && c <= 'z') c = pcre_fcc[c];      if (c >= 'a' && c <= 'z') c = cd->fcc[c];
346      c ^= 0x40;      c ^= 0x40;
347      break;      break;
348    
349      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any      /* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any
350      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,      other alphameric following \ is an error if PCRE_EXTRA was set; otherwise,
351      for Perl compatibility, it is a literal. */      for Perl compatibility, it is a literal. This code looks a bit odd, but
352        there used to be some cases other than the default, and there may be again
353        in future, so I haven't "optimized" it. */
354    
355      default:      default:
356      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
357        {        {
       case 'X':  
       c = -ESC_X;      /* This could be a lookup if it ever got into Perl */  
       break;  
   
358        default:        default:
359        *errorptr = ERR3;        *errorptr = ERR3;
360        break;        break;
# Line 493  where the ddds are digits. Line 380  where the ddds are digits.
380    
381  Arguments:  Arguments:
382    p         pointer to the first char after '{'    p         pointer to the first char after '{'
383      cd        pointer to char tables block
384    
385  Returns:    TRUE or FALSE  Returns:    TRUE or FALSE
386  */  */
387    
388  static BOOL  static BOOL
389  is_counted_repeat(uschar *p)  is_counted_repeat(const uschar *p, compile_data *cd)
390  {  {
391  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
392  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
393  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
394    
395  if (*p++ != ',') return FALSE;  if (*p++ != ',') return FALSE;
396  if (*p == '}') return TRUE;  if (*p == '}') return TRUE;
397    
398  if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE;  if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE;
399  while ((pcre_ctypes[*p] & ctype_digit) != 0) p++;  while ((cd->ctypes[*p] & ctype_digit) != 0) p++;
400  return (*p == '}');  return (*p == '}');
401  }  }
402    
# Line 528  Arguments: Line 416  Arguments:
416    maxp       pointer to int for max    maxp       pointer to int for max
417               returned as -1 if no max               returned as -1 if no max
418    errorptr   points to pointer to error message    errorptr   points to pointer to error message
419      cd         pointer to character tables clock
420    
421  Returns:     pointer to '}' on success;  Returns:     pointer to '}' on success;
422               current ptr on error, with errorptr set               current ptr on error, with errorptr set
423  */  */
424    
425  static uschar *  static const uschar *
426  read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr)  read_repeat_counts(const uschar *p, int *minp, int *maxp,
427      const char **errorptr, compile_data *cd)
428  {  {
429  int min = 0;  int min = 0;
430  int max = -1;  int max = -1;
431    
432  while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';  while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0';
433    
434  if (*p == '}') max = min; else  if (*p == '}') max = min; else
435    {    {
436    if (*(++p) != '}')    if (*(++p) != '}')
437      {      {
438      max = 0;      max = 0;
439      while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';      while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0';
440      if (max < min)      if (max < min)
441        {        {
442        *errorptr = ERR4;        *errorptr = ERR4;
# Line 571  return p; Line 461  return p;
461    
462    
463  /*************************************************  /*************************************************
464    *        Find the fixed length of a pattern      *
465    *************************************************/
466    
467    /* Scan a pattern and compute the fixed length of subject that will match it,
468    if the length is fixed. This is needed for dealing with backward assertions.
469    
470    Arguments:
471      code     points to the start of the pattern (the bracket)
472    
473    Returns:   the fixed length, or -1 if there is no fixed length
474    */
475    
476    static int
477    find_fixedlength(uschar *code)
478    {
479    int length = -1;
480    
481    register int branchlength = 0;
482    register uschar *cc = code + 3;
483    
484    /* Scan along the opcodes for this branch. If we get to the end of the
485    branch, check the length against that of the other branches. */
486    
487    for (;;)
488      {
489      int d;
490      register int op = *cc;
491      if (op >= OP_BRA) op = OP_BRA;
492    
493      switch (op)
494        {
495        case OP_BRA:
496        case OP_ONCE:
497        case OP_COND:
498        d = find_fixedlength(cc);
499        if (d < 0) return -1;
500        branchlength += d;
501        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
502        cc += 3;
503        break;
504    
505        /* Reached end of a branch; if it's a ket it is the end of a nested
506        call. If it's ALT it is an alternation in a nested call. If it is
507        END it's the end of the outer call. All can be handled by the same code. */
508    
509        case OP_ALT:
510        case OP_KET:
511        case OP_KETRMAX:
512        case OP_KETRMIN:
513        case OP_END:
514        if (length < 0) length = branchlength;
515          else if (length != branchlength) return -1;
516        if (*cc != OP_ALT) return length;
517        cc += 3;
518        branchlength = 0;
519        break;
520    
521        /* Skip over assertive subpatterns */
522    
523        case OP_ASSERT:
524        case OP_ASSERT_NOT:
525        case OP_ASSERTBACK:
526        case OP_ASSERTBACK_NOT:
527        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
528        cc += 3;
529        break;
530    
531        /* Skip over things that don't match chars */
532    
533        case OP_REVERSE:
534        cc++;
535    
536        case OP_CREF:
537        case OP_OPT:
538        cc++;
539        /* Fall through */
540    
541        case OP_SOD:
542        case OP_EOD:
543        case OP_EODN:
544        case OP_CIRC:
545        case OP_DOLL:
546        case OP_NOT_WORD_BOUNDARY:
547        case OP_WORD_BOUNDARY:
548        cc++;
549        break;
550    
551        /* Handle char strings */
552    
553        case OP_CHARS:
554        branchlength += *(++cc);
555        cc += *cc + 1;
556        break;
557    
558        /* Handle exact repetitions */
559    
560        case OP_EXACT:
561        case OP_TYPEEXACT:
562        branchlength += (cc[1] << 8) + cc[2];
563        cc += 4;
564        break;
565    
566        /* Handle single-char matchers */
567    
568        case OP_NOT_DIGIT:
569        case OP_DIGIT:
570        case OP_NOT_WHITESPACE:
571        case OP_WHITESPACE:
572        case OP_NOT_WORDCHAR:
573        case OP_WORDCHAR:
574        case OP_ANY:
575        branchlength++;
576        cc++;
577        break;
578    
579    
580        /* Check a class for variable quantification */
581    
582        case OP_CLASS:
583        cc += (*cc == OP_REF)? 2 : 33;
584    
585        switch (*cc)
586          {
587          case OP_CRSTAR:
588          case OP_CRMINSTAR:
589          case OP_CRQUERY:
590          case OP_CRMINQUERY:
591          return -1;
592    
593          case OP_CRRANGE:
594          case OP_CRMINRANGE:
595          if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
596          branchlength += (cc[1] << 8) + cc[2];
597          cc += 5;
598          break;
599    
600          default:
601          branchlength++;
602          }
603        break;
604    
605        /* Anything else is variable length */
606    
607        default:
608        return -1;
609        }
610      }
611    /* Control never gets here */
612    }
613    
614    
615    
616    
617    /*************************************************
618  *           Compile one branch                   *  *           Compile one branch                   *
619  *************************************************/  *************************************************/
620    
621  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
622    
623  Arguments:  Arguments:
624    options    the option bits    options      the option bits
625    bracket    points to number of brackets used    brackets     points to number of brackets used
626    code       points to the pointer to the current code point    code         points to the pointer to the current code point
627    ptrptr     points to the current pattern pointer    ptrptr       points to the current pattern pointer
628    errorptr   points to pointer to error message    errorptr     points to pointer to error message
629      optchanged   set to the value of the last OP_OPT item compiled
630      cd           contains pointers to tables
631    
632  Returns:     TRUE on success  Returns:       TRUE on success
633               FALSE, with *errorptr set on error                 FALSE, with *errorptr set on error
634  */  */
635    
636  static BOOL  static BOOL
637  compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_branch(int options, int *brackets, uschar **codeptr,
638    char **errorptr)    const uschar **ptrptr, const char **errorptr, int *optchanged,
639      compile_data *cd)
640  {  {
641  int repeat_type, op_type;  int repeat_type, op_type;
642  int repeat_min, repeat_max;  int repeat_min, repeat_max;
643  int bravalue, length;  int bravalue, length;
644    int greedy_default, greedy_non_default;
645  register int c;  register int c;
646  register uschar *code = *codeptr;  register uschar *code = *codeptr;
647  uschar *ptr = *ptrptr;  uschar *tempcode;
648    const uschar *ptr = *ptrptr;
649    const uschar *tempptr;
650  uschar *previous = NULL;  uschar *previous = NULL;
 uschar *oldptr;  
651  uschar class[32];  uschar class[32];
652    
653    /* Set up the default and non-default settings for greediness */
654    
655    greedy_default = ((options & PCRE_UNGREEDY) != 0);
656    greedy_non_default = greedy_default ^ 1;
657    
658  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
659    
660  for (;; ptr++)  for (;; ptr++)
661    {    {
662    BOOL negate_class;    BOOL negate_class;
663    int  class_charcount;    int class_charcount;
664    int  class_lastchar;    int class_lastchar;
665      int newoptions;
666      int condref;
667    
668    c = *ptr;    c = *ptr;
669    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
670      {      {
671      if ((pcre_ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
672      if (c == '#')      if (c == '#')
673        {        {
674        while ((c = *(++ptr)) != 0 && c != '\n');        while ((c = *(++ptr)) != 0 && c != '\n');
# Line 657  for (;; ptr++) Line 713  for (;; ptr++)
713      previous = code;      previous = code;
714      *code++ = OP_CLASS;      *code++ = OP_CLASS;
715    
716      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag and skip it. */
717    
718      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
719        {        {
# Line 693  for (;; ptr++) Line 749  for (;; ptr++)
749        /* Backslash may introduce a single character, or it may introduce one        /* Backslash may introduce a single character, or it may introduce one
750        of the specials, which just set a flag. Escaped items are checked for        of the specials, which just set a flag. Escaped items are checked for
751        validity in the pre-compiling pass. The sequence \b is a special case.        validity in the pre-compiling pass. The sequence \b is a special case.
752        Inside a class (and only there) it is treated as backslash. Elsewhere        Inside a class (and only there) it is treated as backspace. Elsewhere
753        it marks a word boundary. Other escapes have preset maps ready to        it marks a word boundary. Other escapes have preset maps ready to
754        or into the one we are building. We assume they have more than one        or into the one we are building. We assume they have more than one
755        character in them, so set class_count bigger than one. */        character in them, so set class_count bigger than one. */
756    
757        if (c == '\\')        if (c == '\\')
758          {          {
759          c = check_escape(&ptr, errorptr, *brackets, options, TRUE);          c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
760          if (-c == ESC_b) c = '\b';          if (-c == ESC_b) c = '\b';
761          else if (c < 0)          else if (c < 0)
762            {            {
763              register const uschar *cbits = cd->cbits;
764            class_charcount = 10;            class_charcount = 10;
765            switch (-c)            switch (-c)
766              {              {
767              case ESC_d:              case ESC_d:
768              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit];
769              continue;              continue;
770    
771              case ESC_D:              case ESC_D:
772              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit];
773              continue;              continue;
774    
775              case ESC_w:              case ESC_w:
776              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
777                class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]);
778              continue;              continue;
779    
780              case ESC_W:              case ESC_W:
781              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
782                class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]);                class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]);
783              continue;              continue;
784    
785              case ESC_s:              case ESC_s:
786              for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space];
787              continue;              continue;
788    
789              case ESC_S:              case ESC_S:
790              for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space];              for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space];
791              continue;              continue;
792    
793              default:              default:
# Line 762  for (;; ptr++) Line 819  for (;; ptr++)
819    
820          if (d == '\\')          if (d == '\\')
821            {            {
822            d = check_escape(&ptr, errorptr, *brackets, options, TRUE);            d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd);
823            if (d < 0)            if (d < 0)
824              {              {
825              if (d == -ESC_b) d = '\b'; else              if (d == -ESC_b) d = '\b'; else
# Line 784  for (;; ptr++) Line 841  for (;; ptr++)
841            class[c/8] |= (1 << (c&7));            class[c/8] |= (1 << (c&7));
842            if ((options & PCRE_CASELESS) != 0)            if ((options & PCRE_CASELESS) != 0)
843              {              {
844              int uc = pcre_fcc[c];           /* flip case */              int uc = cd->fcc[c];           /* flip case */
845              class[uc/8] |= (1 << (uc&7));              class[uc/8] |= (1 << (uc&7));
846              }              }
847            class_charcount++;                /* in case a one-char range */            class_charcount++;                /* in case a one-char range */
# Line 799  for (;; ptr++) Line 856  for (;; ptr++)
856        class [c/8] |= (1 << (c&7));        class [c/8] |= (1 << (c&7));
857        if ((options & PCRE_CASELESS) != 0)        if ((options & PCRE_CASELESS) != 0)
858          {          {
859          c = pcre_fcc[c];   /* flip case */          c = cd->fcc[c];   /* flip case */
860          class[c/8] |= (1 << (c&7));          class[c/8] |= (1 << (c&7));
861          }          }
862        class_charcount++;        class_charcount++;
# Line 846  for (;; ptr++) Line 903  for (;; ptr++)
903      /* Various kinds of repeat */      /* Various kinds of repeat */
904    
905      case '{':      case '{':
906      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR;
907      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr);      ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd);
908      if (*errorptr != NULL) goto FAILED;      if (*errorptr != NULL) goto FAILED;
909      goto REPEAT;      goto REPEAT;
910    
# Line 872  for (;; ptr++) Line 929  for (;; ptr++)
929        goto FAILED;        goto FAILED;
930        }        }
931    
932      /* If the next character is '?' this is a minimizing repeat. Advance to the      /* If the next character is '?' this is a minimizing repeat, by default,
933        but if PCRE_UNGREEDY is set, it works the other way round. Advance to the
934      next character. */      next character. */
935    
936      if (ptr[1] == '?') { repeat_type = 1; ptr++; } else repeat_type = 0;      if (ptr[1] == '?')
937          { repeat_type = greedy_non_default; ptr++; }
938        else repeat_type = greedy_default;
939    
940      /* If the maximum is zero then the minimum must also be zero; Perl allows      /* If the maximum is zero then the minimum must also be zero; Perl allows
941      this case, so we do too - by simply omitting the item altogether. */      this case, so we do too - by simply omitting the item altogether. */
# Line 920  for (;; ptr++) Line 980  for (;; ptr++)
980      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
981      repeats by adding a suitable offset into repeat_type. */      repeats by adding a suitable offset into repeat_type. */
982    
983      else if ((int)*previous < OP_EOD || *previous == OP_ANY)      else if ((int)*previous < OP_EODN || *previous == OP_ANY)
984        {        {
985        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
986        c = *previous;        c = *previous;
# Line 964  for (;; ptr++) Line 1024  for (;; ptr++)
1024          /* If the mininum is 1 and the previous item was a character string,          /* If the mininum is 1 and the previous item was a character string,
1025          we either have to put back the item that got cancelled if the string          we either have to put back the item that got cancelled if the string
1026          length was 1, or add the character back onto the end of a longer          length was 1, or add the character back onto the end of a longer
1027          string. For a character type nothing need be done; it will just get put          string. For a character type nothing need be done; it will just get
1028          back naturally. */          put back naturally. Note that the final character is always going to
1029            get added below. */
1030    
1031          else if (*previous == OP_CHARS)          else if (*previous == OP_CHARS)
1032            {            {
1033            if (code == previous) code += 2; else previous[1]++;            if (code == previous) code += 2; else previous[1]++;
1034            }            }
1035    
1036          /* Insert an UPTO if the max is greater than the min. */          /*  For a single negated character we also have to put back the
1037            item that got cancelled. */
1038    
1039            else if (*previous == OP_NOT) code++;
1040    
1041            /* If the maximum is unlimited, insert an OP_STAR. */
1042    
1043            if (repeat_max < 0)
1044              {
1045              *code++ = c;
1046              *code++ = OP_STAR + repeat_type;
1047              }
1048    
1049            /* Else insert an UPTO if the max is greater than the min. */
1050    
1051          if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
1052            {            {
1053            *code++ = c;            *code++ = c;
1054            repeat_max -= repeat_min;            repeat_max -= repeat_min;
# Line 1012  for (;; ptr++) Line 1086  for (;; ptr++)
1086        }        }
1087    
1088      /* If previous was a bracket group, we may have to replicate it in certain      /* If previous was a bracket group, we may have to replicate it in certain
1089      cases. If the maximum repeat count is unlimited, check that the bracket      cases. */
     group cannot match the empty string, and diagnose an error if it can. */  
1090    
1091      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1092                 (int)*previous == OP_COND)
1093        {        {
1094        int i;        register int i;
1095        int length = code - previous;        int ketoffset = 0;
1096          int len = code - previous;
1097          uschar *bralink = NULL;
1098    
1099          /* If the maximum repeat count is unlimited, find the end of the bracket
1100          by scanning through from the start, and compute the offset back to it
1101          from the current code pointer. There may be an OP_OPT setting following
1102          the final KET, so we can't find the end just by going back from the code
1103          pointer. */
1104    
1105          if (repeat_max == -1)
1106            {
1107            register uschar *ket = previous;
1108            do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1109            ketoffset = code - ket;
1110            }
1111    
1112          /* The case of a zero minimum is special because of the need to stick
1113          OP_BRAZERO in front of it, and because the group appears once in the
1114          data, whereas in other cases it appears the minimum number of times. For
1115          this reason, it is simplest to treat this case separately, as otherwise
1116          the code gets far too mess. There are several special subcases when the
1117          minimum is zero. */
1118    
1119        if (repeat_max == -1 && could_be_empty(previous))        if (repeat_min == 0)
1120          {          {
1121          *errorptr = ERR10;          /* If the maximum is also zero, we just omit the group from the output
1122          goto FAILED;          altogether. */
1123    
1124            if (repeat_max == 0)
1125              {
1126              code = previous;
1127              previous = NULL;
1128              break;
1129              }
1130    
1131            /* If the maximum is 1 or unlimited, we just have to stick in the
1132            BRAZERO and do no more at this point. */
1133    
1134            if (repeat_max <= 1)
1135              {
1136              memmove(previous+1, previous, len);
1137              code++;
1138              *previous++ = OP_BRAZERO + repeat_type;
1139              }
1140    
1141            /* If the maximum is greater than 1 and limited, we have to replicate
1142            in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1143            The first one has to be handled carefully because it's the original
1144            copy, which has to be moved up. The remainder can be handled by code
1145            that is common with the non-zero minimum case below. We just have to
1146            adjust the value or repeat_max, since one less copy is required. */
1147    
1148            else
1149              {
1150              int offset;
1151              memmove(previous+4, previous, len);
1152              code += 4;
1153              *previous++ = OP_BRAZERO + repeat_type;
1154              *previous++ = OP_BRA;
1155    
1156              /* We chain together the bracket offset fields that have to be
1157              filled in later when the ends of the brackets are reached. */
1158    
1159              offset = (bralink == NULL)? 0 : previous - bralink;
1160              bralink = previous;
1161              *previous++ = offset >> 8;
1162              *previous++ = offset & 255;
1163              }
1164    
1165            repeat_max--;
1166            }
1167    
1168          /* If the minimum is greater than zero, replicate the group as many
1169          times as necessary, and adjust the maximum to the number of subsequent
1170          copies that we need. */
1171    
1172          else
1173            {
1174            for (i = 1; i < repeat_min; i++)
1175              {
1176              memcpy(code, previous, len);
1177              code += len;
1178              }
1179            if (repeat_max > 0) repeat_max -= repeat_min;
1180            }
1181    
1182          /* This code is common to both the zero and non-zero minimum cases. If
1183          the maximum is limited, it replicates the group in a nested fashion,
1184          remembering the bracket starts on a stack. In the case of a zero minimum,
1185          the first one was set up above. In all cases the repeat_max now specifies
1186          the number of additional copies needed. */
1187    
1188          if (repeat_max >= 0)
1189            {
1190            for (i = repeat_max - 1; i >= 0; i--)
1191              {
1192              *code++ = OP_BRAZERO + repeat_type;
1193    
1194              /* All but the final copy start a new nesting, maintaining the
1195              chain of brackets outstanding. */
1196    
1197              if (i != 0)
1198                {
1199                int offset;
1200                *code++ = OP_BRA;
1201                offset = (bralink == NULL)? 0 : code - bralink;
1202                bralink = code;
1203                *code++ = offset >> 8;
1204                *code++ = offset & 255;
1205                }
1206    
1207              memcpy(code, previous, len);
1208              code += len;
1209              }
1210    
1211            /* Now chain through the pending brackets, and fill in their length
1212            fields (which are holding the chain links pro tem). */
1213    
1214            while (bralink != NULL)
1215              {
1216              int oldlinkoffset;
1217              int offset = code - bralink + 1;
1218              uschar *bra = code - offset;
1219              oldlinkoffset = (bra[1] << 8) + bra[2];
1220              bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
1221              *code++ = OP_KET;
1222              *code++ = bra[1] = offset >> 8;
1223              *code++ = bra[2] = (offset & 255);
1224              }
1225          }          }
1226    
1227          /* If the maximum is unlimited, set a repeater in the final copy. We
1228          can't just offset backwards from the current code point, because we
1229          don't know if there's been an options resetting after the ket. The
1230          correct offset was computed above. */
1231    
1232          else code[-ketoffset] = OP_KETRMAX + repeat_type;
1233    
1234    
1235    #ifdef NEVER
1236        /* If the minimum is greater than zero, and the maximum is unlimited or        /* If the minimum is greater than zero, and the maximum is unlimited or
1237        equal to the minimum, the first copy remains where it is, and is        equal to the minimum, the first copy remains where it is, and is
1238        replicated up to the minimum number of times. This case includes the +        replicated up to the minimum number of times. This case includes the +
# Line 1035  for (;; ptr++) Line 1242  for (;; ptr++)
1242          {          {
1243          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1244            {            {
1245            memcpy(code, previous, length);            memcpy(code, previous, len);
1246            code += length;            code += len;
1247            }            }
1248          }          }
1249    
# Line 1048  for (;; ptr++) Line 1255  for (;; ptr++)
1255          {          {
1256          if (repeat_min == 0)          if (repeat_min == 0)
1257            {            {
1258            memmove(previous+1, previous, length);            memmove(previous+1, previous, len);
1259            code++;            code++;
1260            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
1261            }            }
1262    
1263          for (i = 1; i < repeat_min; i++)          for (i = 1; i < repeat_min; i++)
1264            {            {
1265            memcpy(code, previous, length);            memcpy(code, previous, len);
1266            code += length;            code += len;
1267            }            }
1268    
1269          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)          for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)
1270            {            {
1271            *code++ = OP_BRAZERO + repeat_type;            *code++ = OP_BRAZERO + repeat_type;
1272            memcpy(code, previous, length);            memcpy(code, previous, len);
1273            code += length;            code += len;
1274            }            }
1275          }          }
1276    
1277        /* If the maximum is unlimited, set a repeater in the final copy. */        /* If the maximum is unlimited, set a repeater in the final copy. We
1278          can't just offset backwards from the current code point, because we
1279          don't know if there's been an options resetting after the ket. The
1280          correct offset was computed above. */
1281    
1282          if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;
1283    #endif
1284    
1285    
       if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;  
1286        }        }
1287    
1288      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1086  for (;; ptr++) Line 1299  for (;; ptr++)
1299      break;      break;
1300    
1301    
1302      /* Start of nested bracket sub-expression, or comment or lookahead.      /* Start of nested bracket sub-expression, or comment or lookahead or
1303      First deal with special things that can come after a bracket; all are      lookbehind or option setting or condition. First deal with special things
1304      introduced by ?, and the appearance of any of them means that this is not a      that can come after a bracket; all are introduced by ?, and the appearance
1305      referencing group. They were checked for validity in the first pass over      of any of them means that this is not a referencing group. They were
1306      the string, so we don't have to check for syntax errors here.  */      checked for validity in the first pass over the string, so we don't have to
1307        check for syntax errors here.  */
1308    
1309      case '(':      case '(':
1310      previous = code;              /* Only real brackets can be repeated */      newoptions = options;
1311        condref = -1;
1312    
1313      if (*(++ptr) == '?')      if (*(++ptr) == '?')
1314        {        {
1315        bravalue = OP_BRA;        int set, unset;
1316          int *optset;
1317    
1318        switch (*(++ptr))        switch (*(++ptr))
1319          {          {
1320          case '#':          case '#':                 /* Comment; skip to ket */
         case 'i':  
         case 'm':  
         case 's':  
         case 'x':  
1321          ptr++;          ptr++;
1322          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
         previous = NULL;  
1323          continue;          continue;
1324    
1325          case ':':                 /* Non-extracting bracket */          case ':':                 /* Non-extracting bracket */
1326            bravalue = OP_BRA;
1327          ptr++;          ptr++;
1328          break;          break;
1329    
1330          case '=':                 /* Assertions can't be repeated */          case '(':
1331            bravalue = OP_COND;       /* Conditional group */
1332            if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0)
1333              {
1334              condref = *ptr - '0';
1335              while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1336              ptr++;
1337              }
1338            else ptr--;
1339            break;
1340    
1341            case '=':                 /* Positive lookahead */
1342          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
1343          ptr++;          ptr++;
         previous = NULL;  
1344          break;          break;
1345    
1346          case '!':          case '!':                 /* Negative lookahead */
1347          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
1348          ptr++;          ptr++;
         previous = NULL;  
1349          break;          break;
1350    
1351          case '>':                         /* "Match once" brackets */          case '<':                 /* Lookbehinds */
1352          if ((options & PCRE_EXTRA) != 0)  /* Not yet standard */          switch (*(++ptr))
1353            {            {
1354            bravalue = OP_ONCE;            case '=':               /* Positive lookbehind */
1355              bravalue = OP_ASSERTBACK;
1356            ptr++;            ptr++;
           previous = NULL;  
1357            break;            break;
1358    
1359              case '!':               /* Negative lookbehind */
1360              bravalue = OP_ASSERTBACK_NOT;
1361              ptr++;
1362              break;
1363    
1364              default:                /* Syntax error */
1365              *errorptr = ERR24;
1366              goto FAILED;
1367            }            }
1368          /* Else fall through */          break;
1369    
1370          default:          case '>':                 /* One-time brackets */
1371          *errorptr = ERR12;          bravalue = OP_ONCE;
1372          goto FAILED;          ptr++;
1373            break;
1374    
1375            default:                  /* Option setting */
1376            set = unset = 0;
1377            optset = &set;
1378    
1379            while (*ptr != ')' && *ptr != ':')
1380              {
1381              switch (*ptr++)
1382                {
1383                case '-': optset = &unset; break;
1384    
1385                case 'i': *optset |= PCRE_CASELESS; break;
1386                case 'm': *optset |= PCRE_MULTILINE; break;
1387                case 's': *optset |= PCRE_DOTALL; break;
1388                case 'x': *optset |= PCRE_EXTENDED; break;
1389                case 'U': *optset |= PCRE_UNGREEDY; break;
1390                case 'X': *optset |= PCRE_EXTRA; break;
1391    
1392                default:
1393                *errorptr = ERR12;
1394                goto FAILED;
1395                }
1396              }
1397    
1398            /* Set up the changed option bits, but don't change anything yet. */
1399    
1400            newoptions = (options | set) & (~unset);
1401    
1402            /* If the options ended with ')' this is not the start of a nested
1403            group with option changes, so the options change at this level. At top
1404            level there is nothing else to be done (the options will in fact have
1405            been set from the start of compiling as a result of the first pass) but
1406            at an inner level we must compile code to change the ims options if
1407            necessary, and pass the new setting back so that it can be put at the
1408            start of any following branches, and when this group ends, a resetting
1409            item can be compiled. */
1410    
1411            if (*ptr == ')')
1412              {
1413              if ((options & PCRE_INGROUP) != 0 &&
1414                  (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1415                {
1416                *code++ = OP_OPT;
1417                *code++ = *optchanged = newoptions & PCRE_IMS;
1418                }
1419              options = newoptions;  /* Change options at this level */
1420              previous = NULL;       /* This item can't be repeated */
1421              continue;              /* It is complete */
1422              }
1423    
1424            /* If the options ended with ':' we are heading into a nested group
1425            with possible change of options. Such groups are non-capturing and are
1426            not assertions of any kind. All we need to do is skip over the ':';
1427            the newoptions value is handled below. */
1428    
1429            bravalue = OP_BRA;
1430            ptr++;
1431          }          }
1432        }        }
1433    
1434      /* Else we have a referencing group */      /* Else we have a referencing group; adjust the opcode. */
1435    
1436      else      else
1437        {        {
# Line 1154  for (;; ptr++) Line 1443  for (;; ptr++)
1443        bravalue = OP_BRA + *brackets;        bravalue = OP_BRA + *brackets;
1444        }        }
1445    
1446      /* Process nested bracketed re; at end pointer is on the bracket. We copy      /* Process nested bracketed re. Assertions may not be repeated, but other
1447      code into a non-register variable in order to be able to pass its address      kinds can be. We copy code into a non-register variable in order to be able
1448      because some compilers complain otherwise. */      to pass its address because some compilers complain otherwise. Pass in a
1449        new setting for the ims options if they have changed. */
1450    
1451        previous = (bravalue >= OP_ONCE)? code : NULL;
1452      *code = bravalue;      *code = bravalue;
1453        tempcode = code;
1454    
1455        if (!compile_regex(
1456             options | PCRE_INGROUP,       /* Set for all nested groups */
1457             ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1458               newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1459             brackets,                     /* Bracket level */
1460             &tempcode,                    /* Where to put code (updated) */
1461             &ptr,                         /* Input pointer (updated) */
1462             errorptr,                     /* Where to put an error message */
1463             (bravalue == OP_ASSERTBACK ||
1464              bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1465             condref,                      /* Condition reference number */
1466             cd))                          /* Tables block */
1467          goto FAILED;
1468    
1469        /* At the end of compiling, code is still pointing to the start of the
1470        group, while tempcode has been updated to point past the end of the group
1471        and any option resetting that may follow it. The pattern pointer (ptr)
1472        is on the bracket. */
1473    
1474        /* If this is a conditional bracket, check that there are no more than
1475        two branches in the group. */
1476    
1477        if (bravalue == OP_COND)
1478        {        {
1479        uschar *mcode = code;        int branchcount = 0;
1480        if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))        uschar *tc = code;
1481    
1482          do {
1483             branchcount++;
1484             tc += (tc[1] << 8) | tc[2];
1485             }
1486          while (*tc != OP_KET);
1487    
1488          if (branchcount > 2)
1489            {
1490            *errorptr = ERR27;
1491          goto FAILED;          goto FAILED;
1492        code = mcode;          }
1493        }        }
1494    
1495        /* Now update the main code pointer to the end of the group. */
1496    
1497        code = tempcode;
1498    
1499        /* Error if hit end of pattern */
1500    
1501      if (*ptr != ')')      if (*ptr != ')')
1502        {        {
1503        *errorptr = ERR14;        *errorptr = ERR14;
# Line 1178  for (;; ptr++) Line 1510  for (;; ptr++)
1510      for validity in the pre-compiling pass. */      for validity in the pre-compiling pass. */
1511    
1512      case '\\':      case '\\':
1513      oldptr = ptr;      tempptr = ptr;
1514      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1515    
1516      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
1517      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values. For the
# Line 1192  for (;; ptr++) Line 1524  for (;; ptr++)
1524        {        {
1525        if (-c >= ESC_REF)        if (-c >= ESC_REF)
1526          {          {
         int refnum = -c - ESC_REF;  
         if (*brackets < refnum)  
           {  
           *errorptr = ERR15;  
           goto FAILED;  
           }  
1527          previous = code;          previous = code;
1528          *code++ = OP_REF;          *code++ = OP_REF;
1529          *code++ = refnum;          *code++ = -c - ESC_REF;
1530          }          }
1531        else        else
1532          {          {
1533          previous = (-c > ESC_b && -c < ESC_X)? code : NULL;          previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1534          *code++ = -c;          *code++ = -c;
1535          }          }
1536        continue;        continue;
1537        }        }
1538    
1539      /* Reset and fall through */      /* Data character: reset and fall through */
1540    
1541      ptr = oldptr;      ptr = tempptr;
1542      c = '\\';      c = '\\';
1543    
1544      /* Handle a run of data characters until a metacharacter is encountered.      /* Handle a run of data characters until a metacharacter is encountered.
# Line 1230  for (;; ptr++) Line 1556  for (;; ptr++)
1556        {        {
1557        if ((options & PCRE_EXTENDED) != 0)        if ((options & PCRE_EXTENDED) != 0)
1558          {          {
1559          if ((pcre_ctypes[c] & ctype_space) != 0) continue;          if ((cd->ctypes[c] & ctype_space) != 0) continue;
1560          if (c == '#')          if (c == '#')
1561            {            {
1562            while ((c = *(++ptr)) != 0 && c != '\n');            while ((c = *(++ptr)) != 0 && c != '\n');
# Line 1245  for (;; ptr++) Line 1571  for (;; ptr++)
1571    
1572        if (c == '\\')        if (c == '\\')
1573          {          {
1574          oldptr = ptr;          tempptr = ptr;
1575          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd);
1576          if (c < 0) { ptr = oldptr; break; }          if (c < 0) { ptr = tempptr; break; }
1577          }          }
1578    
1579        /* Ordinary character or single-char escape */        /* Ordinary character or single-char escape */
# Line 1258  for (;; ptr++) Line 1584  for (;; ptr++)
1584    
1585      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
1586    
1587      while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1588    
1589      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1590      the next state. */      the next state. */
1591    
1592      previous[1] = length;      previous[1] = length;
1593      ptr--;      if (length < 255) ptr--;
1594      break;      break;
1595      }      }
1596    }                   /* end of big loop */    }                   /* end of big loop */
# Line 1288  return FALSE; Line 1614  return FALSE;
1614  /* On entry, ptr is pointing past the bracket character, but on return  /* On entry, ptr is pointing past the bracket character, but on return
1615  it points to the closing bracket, or vertical bar, or end of string.  it points to the closing bracket, or vertical bar, or end of string.
1616  The code variable is pointing at the byte into which the BRA operator has been  The code variable is pointing at the byte into which the BRA operator has been
1617  stored.  stored. If the ims options are changed at the start (for a (?ims: group) or
1618    during any branch, we need to insert an OP_OPT item at the start of every
1619    following branch to ensure they get set correctly at run time, and also pass
1620    the new options into every subsequent branch compile.
1621    
1622  Argument:  Argument:
1623    options   the option bits    options     the option bits
1624    brackets  -> int containing the number of extracting brackets used    optchanged  new ims options to set as if (?ims) were at the start, or -1
1625    codeptr   -> the address of the current code pointer                 for no change
1626    ptrptr    -> the address of the current pattern pointer    brackets    -> int containing the number of extracting brackets used
1627    errorptr  -> pointer to error message    codeptr     -> the address of the current code pointer
1628      ptrptr      -> the address of the current pattern pointer
1629      errorptr    -> pointer to error message
1630      lookbehind  TRUE if this is a lookbehind assertion
1631      condref     > 0 for OPT_CREF setting at start of conditional group
1632      cd          points to the data block with tables pointers
1633    
1634  Returns:    TRUE on success  Returns:      TRUE on success
1635  */  */
1636    
1637  static BOOL  static BOOL
1638  compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1639    char **errorptr)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1640      compile_data *cd)
1641  {  {
1642  uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1643  uschar *code = *codeptr;  uschar *code = *codeptr;
1644    uschar *last_branch = code;
1645  uschar *start_bracket = code;  uschar *start_bracket = code;
1646    uschar *reverse_count = NULL;
1647    int oldoptions = options & PCRE_IMS;
1648    
1649    code += 3;
1650    
1651    /* At the start of a reference-based conditional group, insert the reference
1652    number as an OP_CREF item. */
1653    
1654    if (condref > 0)
1655      {
1656      *code++ = OP_CREF;
1657      *code++ = condref;
1658      }
1659    
1660    /* Loop for each alternative branch */
1661    
1662  for (;;)  for (;;)
1663    {    {
1664    int length;    int length;
   uschar *last_branch = code;  
1665    
1666    code += 3;    /* Handle change of options */
1667    if (!compile_branch(options, brackets, &code, &ptr, errorptr))  
1668      if (optchanged >= 0)
1669        {
1670        *code++ = OP_OPT;
1671        *code++ = optchanged;
1672        options = (options & ~PCRE_IMS) | optchanged;
1673        }
1674    
1675      /* Set up dummy OP_REVERSE if lookbehind assertion */
1676    
1677      if (lookbehind)
1678        {
1679        *code++ = OP_REVERSE;
1680        reverse_count = code;
1681        *code++ = 0;
1682        *code++ = 0;
1683        }
1684    
1685      /* Now compile the branch */
1686    
1687      if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd))
1688      {      {
1689      *ptrptr = ptr;      *ptrptr = ptr;
1690      return FALSE;      return FALSE;
# Line 1326  for (;;) Line 1696  for (;;)
1696    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1697    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1698    
1699      /* If lookbehind, check that this branch matches a fixed-length string,
1700      and put the length into the OP_REVERSE item. Temporarily mark the end of
1701      the branch with OP_END. */
1702    
1703      if (lookbehind)
1704        {
1705        *code = OP_END;
1706        length = find_fixedlength(last_branch);
1707        DPRINTF(("fixed length = %d\n", length));
1708        if (length < 0)
1709          {
1710          *errorptr = ERR25;
1711          *ptrptr = ptr;
1712          return FALSE;
1713          }
1714        reverse_count[0] = (length >> 8);
1715        reverse_count[1] = length & 255;
1716        }
1717    
1718    /* Reached end of expression, either ')' or end of pattern. Insert a    /* Reached end of expression, either ')' or end of pattern. Insert a
1719    terminating ket and the length of the whole bracketed item, and return,    terminating ket and the length of the whole bracketed item, and return,
1720    leaving the pointer at the terminating char. */    leaving the pointer at the terminating char. If any of the ims options
1721      were changed inside the group, compile a resetting op-code following. */
1722    
1723    if (*ptr != '|')    if (*ptr != '|')
1724      {      {
# Line 1336  for (;;) Line 1726  for (;;)
1726      *code++ = OP_KET;      *code++ = OP_KET;
1727      *code++ = length >> 8;      *code++ = length >> 8;
1728      *code++ = length & 255;      *code++ = length & 255;
1729        if (optchanged >= 0)
1730          {
1731          *code++ = OP_OPT;
1732          *code++ = oldoptions;
1733          }
1734      *codeptr = code;      *codeptr = code;
1735      *ptrptr = ptr;      *ptrptr = ptr;
1736      return TRUE;      return TRUE;
# Line 1344  for (;;) Line 1739  for (;;)
1739    /* Another branch follows; insert an "or" node and advance the pointer. */    /* Another branch follows; insert an "or" node and advance the pointer. */
1740    
1741    *code = OP_ALT;    *code = OP_ALT;
1742      last_branch = code;
1743      code += 3;
1744    ptr++;    ptr++;
1745    }    }
1746  /* Control never reaches here */  /* Control never reaches here */
# Line 1351  for (;;) Line 1748  for (;;)
1748    
1749    
1750    
1751    
1752    /*************************************************
1753    *      Find first significant op code            *
1754    *************************************************/
1755    
1756    /* This is called by several functions that scan a compiled expression looking
1757    for a fixed first character, or an anchoring op code etc. It skips over things
1758    that do not influence this. For one application, a change of caseless option is
1759    important.
1760    
1761    Arguments:
1762      code       pointer to the start of the group
1763      options    pointer to external options
1764      optbit     the option bit whose changing is significant, or
1765                 zero if none are
1766      optstop    TRUE to return on option change, otherwise change the options
1767                   value and continue
1768    
1769    Returns:     pointer to the first significant opcode
1770    */
1771    
1772    static const uschar*
1773    first_significant_code(const uschar *code, int *options, int optbit,
1774      BOOL optstop)
1775    {
1776    for (;;)
1777      {
1778      switch ((int)*code)
1779        {
1780        case OP_OPT:
1781        if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
1782          {
1783          if (optstop) return code;
1784          *options = (int)code[1];
1785          }
1786        code += 2;
1787        break;
1788    
1789        case OP_CREF:
1790        code += 2;
1791        break;
1792    
1793        case OP_WORD_BOUNDARY:
1794        case OP_NOT_WORD_BOUNDARY:
1795        code++;
1796        break;
1797    
1798        case OP_ASSERT_NOT:
1799        case OP_ASSERTBACK:
1800        case OP_ASSERTBACK_NOT:
1801        do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
1802        code += 3;
1803        break;
1804    
1805        default:
1806        return code;
1807        }
1808      }
1809    /* Control never reaches here */
1810    }
1811    
1812    
1813    
1814    
1815  /*************************************************  /*************************************************
1816  *          Check for anchored expression         *  *          Check for anchored expression         *
1817  *************************************************/  *************************************************/
# Line 1361  all of whose alternatives start with OP_ Line 1822  all of whose alternatives start with OP_
1822  it's anchored. However, if this is a multiline pattern, then only OP_SOD  it's anchored. However, if this is a multiline pattern, then only OP_SOD
1823  counts, since OP_CIRC can match in the middle.  counts, since OP_CIRC can match in the middle.
1824    
1825  A branch is also implicitly anchored if it starts with .* because that will try  A branch is also implicitly anchored if it starts with .* and DOTALL is set,
1826  the rest of the pattern at all possible matching points, so there is no point  because that will try the rest of the pattern at all possible matching points,
1827  trying them again.  so there is no point trying them again.
1828    
1829  Argument:  points to start of expression (the bracket)  Arguments:
1830  Returns:   TRUE or FALSE    code       points to start of expression (the bracket)
1831      options    points to the options setting
1832    
1833    Returns:     TRUE or FALSE
1834  */  */
1835    
1836  static BOOL  static BOOL
1837  is_anchored(register uschar *code, BOOL multiline)  is_anchored(register const uschar *code, int *options)
1838  {  {
1839  do {  do {
1840     int op = (int)code[3];     const uschar *scode = first_significant_code(code + 3, options,
1841     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)       PCRE_MULTILINE, FALSE);
1842       { if (!is_anchored(code+3, multiline)) return FALSE; }     register int op = *scode;
1843     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1844       { if (code[4] != OP_ANY) return FALSE; }       { if (!is_anchored(scode, options)) return FALSE; }
1845     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
1846                (*options & PCRE_DOTALL) != 0)
1847         { if (scode[1] != OP_ANY) return FALSE; }
1848       else if (op != OP_SOD &&
1849               ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
1850         return FALSE;
1851     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1852     }     }
1853  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1388  return TRUE; Line 1857  return TRUE;
1857    
1858    
1859  /*************************************************  /*************************************************
1860  *     Check for start with \n line expression    *  *         Check for starting with ^ or .*        *
1861  *************************************************/  *************************************************/
1862    
1863  /* This is called for multiline expressions to try to find out if every branch  /* This is called to find out if every branch starts with ^ or .* so that
1864  starts with ^ so that "first char" processing can be done to speed things up.  "first char" processing can be done to speed things up in multiline
1865    matching and for non-DOTALL patterns that start with .* (which must start at
1866    the beginning or after \n).
1867    
1868  Argument:  points to start of expression (the bracket)  Argument:  points to start of expression (the bracket)
1869  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
1870  */  */
1871    
1872  static BOOL  static BOOL
1873  is_startline(uschar *code)  is_startline(const uschar *code)
1874  {  {
1875  do {  do {
1876     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
1877       { if (!is_startline(code+3)) return FALSE; }     register int op = *scode;
1878     else if (code[3] != OP_CIRC) return FALSE;     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1879         { if (!is_startline(scode)) return FALSE; }
1880       else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1881         { if (scode[1] != OP_ANY) return FALSE; }
1882       else if (op != OP_CIRC) return FALSE;
1883     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1884     }     }
1885  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1423  Consider each alternative branch. If the Line 1898  Consider each alternative branch. If the
1898  a bracket all of whose alternatives start with the same char (recurse ad lib),  a bracket all of whose alternatives start with the same char (recurse ad lib),
1899  then we return that char, otherwise -1.  then we return that char, otherwise -1.
1900    
1901  Argument:  points to start of expression (the bracket)  Arguments:
1902  Returns:   -1 or the fixed first char    code       points to start of expression (the bracket)
1903      options    pointer to the options (used to check casing changes)
1904    
1905    Returns:     -1 or the fixed first char
1906  */  */
1907    
1908  static int  static int
1909  find_firstchar(uschar *code)  find_firstchar(const uschar *code, int *options)
1910  {  {
1911  register int c = -1;  register int c = -1;
1912  do  do {
1913    {     int d;
1914    register int charoffset = 4;     const uschar *scode = first_significant_code(code + 3, options,
1915         PCRE_CASELESS, TRUE);
1916    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     register int op = *scode;
1917      {  
1918      register int d;     if (op >= OP_BRA) op = OP_BRA;
1919      if ((d = find_firstchar(code+3)) < 0) return -1;  
1920      if (c < 0) c = d; else if (c != d) return -1;     switch(op)
1921      }       {
1922         default:
1923    else switch(code[3])       return -1;
1924      {  
1925      default:       case OP_BRA:
1926      return -1;       case OP_ASSERT:
1927         case OP_ONCE:
1928      case OP_EXACT:       /* Fall through */       case OP_COND:
1929      charoffset++;       if ((d = find_firstchar(scode, options)) < 0) return -1;
1930         if (c < 0) c = d; else if (c != d) return -1;
1931      case OP_CHARS:       /* Fall through */       break;
1932      charoffset++;  
1933         case OP_EXACT:       /* Fall through */
1934         scode++;
1935    
1936         case OP_CHARS:       /* Fall through */
1937         scode++;
1938    
1939         case OP_PLUS:
1940         case OP_MINPLUS:
1941         if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
1942         break;
1943         }
1944    
1945      case OP_PLUS:     code += (code[1] << 8) + code[2];
1946      case OP_MINPLUS:     }
     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;  
     break;  
     }  
   code += (code[1] << 8) + code[2];  
   }  
1947  while (*code == OP_ALT);  while (*code == OP_ALT);
1948  return c;  return c;
1949  }  }
1950    
1951    
1952    
1953    
1954    
1955  /*************************************************  /*************************************************
1956  *        Compile a Regular Expression            *  *        Compile a Regular Expression            *
1957  *************************************************/  *************************************************/
# Line 1478  Arguments: Line 1964  Arguments:
1964    options      various option bits    options      various option bits
1965    errorptr     pointer to pointer to error text    errorptr     pointer to pointer to error text
1966    erroroffset  ptr offset in pattern where error was detected    erroroffset  ptr offset in pattern where error was detected
1967      tables       pointer to character tables or NULL
1968    
1969  Returns:       pointer to compiled data block, or NULL on error,  Returns:       pointer to compiled data block, or NULL on error,
1970                 with errorptr and erroroffset set                 with errorptr and erroroffset set
1971  */  */
1972    
1973  pcre *  pcre *
1974  pcre_compile(const char *pattern, int options, char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
1975    int *erroroffset)    int *erroroffset, const unsigned char *tables)
1976  {  {
1977  real_pcre *re;  real_pcre *re;
 int spaces = 0;  
1978  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
1979  int runlength;  int runlength;
1980  int c, size;  int c, size;
1981  int bracount = 0;  int bracount = 0;
 int brastack[200];  
 int brastackptr = 0;  
1982  int top_backref = 0;  int top_backref = 0;
1983  uschar *code, *ptr;  int branch_extra = 0;
1984    int branch_newextra;
1985    unsigned int brastackptr = 0;
1986    uschar *code;
1987    const uschar *ptr;
1988    compile_data compile_block;
1989    int brastack[BRASTACK_SIZE];
1990    uschar bralenstack[BRASTACK_SIZE];
1991    
1992  #ifdef DEBUG  #ifdef DEBUG
1993  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1523  if ((options & ~PUBLIC_OPTIONS) != 0) Line 2014  if ((options & ~PUBLIC_OPTIONS) != 0)
2014    return NULL;    return NULL;
2015    }    }
2016    
2017  #ifdef DEBUG  /* Set up pointers to the individual character tables */
2018  printf("------------------------------------------------------------------\n");  
2019  printf("%s\n", pattern);  if (tables == NULL) tables = pcre_default_tables;
2020  #endif  compile_block.lcc = tables + lcc_offset;
2021    compile_block.fcc = tables + fcc_offset;
2022    compile_block.cbits = tables + cbits_offset;
2023    compile_block.ctypes = tables + ctypes_offset;
2024    
2025    /* Reflect pattern for debugging output */
2026    
2027    DPRINTF(("------------------------------------------------------------------\n"));
2028    DPRINTF(("%s\n", pattern));
2029    
2030  /* The first thing to do is to make a pass over the pattern to compute the  /* The first thing to do is to make a pass over the pattern to compute the
2031  amount of store required to hold the compiled code. This does not have to be  amount of store required to hold the compiled code. This does not have to be
# Line 1535  internal flag settings. Make an attempt Line 2034  internal flag settings. Make an attempt
2034  if an "extended" flag setting appears late in the pattern. We can't be so  if an "extended" flag setting appears late in the pattern. We can't be so
2035  clever for #-comments. */  clever for #-comments. */
2036    
2037  ptr = (uschar *)(pattern - 1);  ptr = (const uschar *)(pattern - 1);
2038  while ((c = *(++ptr)) != 0)  while ((c = *(++ptr)) != 0)
2039    {    {
2040    int min, max;    int min, max;
2041    int class_charcount;    int class_charcount;
2042    
2043    if ((pcre_ctypes[c] & ctype_space) != 0)    if ((options & PCRE_EXTENDED) != 0)
     {  
     if ((options & PCRE_EXTENDED) != 0) continue;  
     spaces++;  
     }  
   
   if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2044      {      {
2045      while ((c = *(++ptr)) != 0 && c != '\n');      if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2046      continue;      if (c == '#')
2047          {
2048          while ((c = *(++ptr)) != 0 && c != '\n');
2049          continue;
2050          }
2051      }      }
2052    
2053    switch(c)    switch(c)
# Line 1562  while ((c = *(++ptr)) != 0) Line 2059  while ((c = *(++ptr)) != 0)
2059    
2060      case '\\':      case '\\':
2061        {        {
2062        uschar *save_ptr = ptr;        const uschar *save_ptr = ptr;
2063        c = check_escape(&ptr, errorptr, bracount, options, FALSE);        c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block);
2064        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;        if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2065        if (c >= 0)        if (c >= 0)
2066          {          {
# Line 1583  while ((c = *(++ptr)) != 0) Line 2080  while ((c = *(++ptr)) != 0)
2080        int refnum = -c - ESC_REF;        int refnum = -c - ESC_REF;
2081        if (refnum > top_backref) top_backref = refnum;        if (refnum > top_backref) top_backref = refnum;
2082        length++;   /* For single back reference */        length++;   /* For single back reference */
2083        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2084          {          {
2085          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2086          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2087          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2088            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1609  while ((c = *(++ptr)) != 0) Line 2106  while ((c = *(++ptr)) != 0)
2106      or back reference. */      or back reference. */
2107    
2108      case '{':      case '{':
2109      if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR;      if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR;
2110      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr);      ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block);
2111      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;      if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2112      if ((min == 0 && (max == 1 || max == -1)) ||      if ((min == 0 && (max == 1 || max == -1)) ||
2113        (min == 1 && max == -1))        (min == 1 && max == -1))
# Line 1624  while ((c = *(++ptr)) != 0) Line 2121  while ((c = *(++ptr)) != 0)
2121      if (ptr[1] == '?') ptr++;      if (ptr[1] == '?') ptr++;
2122      continue;      continue;
2123    
2124      /* An alternation contains an offset to the next branch or ket. */      /* An alternation contains an offset to the next branch or ket. If any ims
2125        options changed in the previous branch(es), and/or if we are in a
2126        lookbehind assertion, extra space will be needed at the start of the
2127        branch. This is handled by branch_extra. */
2128    
2129      case '|':      case '|':
2130      length += 3;      length += 3 + branch_extra;
2131      continue;      continue;
2132    
2133      /* A character class uses 33 characters. Don't worry about character types      /* A character class uses 33 characters. Don't worry about character types
# Line 1641  while ((c = *(++ptr)) != 0) Line 2142  while ((c = *(++ptr)) != 0)
2142        {        {
2143        if (*ptr == '\\')        if (*ptr == '\\')
2144          {          {
2145          int c = check_escape(&ptr, errorptr, bracount, options, TRUE);          int ch = check_escape(&ptr, errorptr, bracount, options, TRUE,
2146              &compile_block);
2147          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2148          if (-c == ESC_b) class_charcount++; else class_charcount = 10;          if (-ch == ESC_b) class_charcount++; else class_charcount = 10;
2149          }          }
2150        else class_charcount++;        else class_charcount++;
2151        ptr++;        ptr++;
# Line 1658  while ((c = *(++ptr)) != 0) Line 2160  while ((c = *(++ptr)) != 0)
2160    
2161        /* A repeat needs either 1 or 5 bytes. */        /* A repeat needs either 1 or 5 bytes. */
2162    
2163        if (ptr[1] == '{' && is_counted_repeat(ptr+2))        if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block))
2164          {          {
2165          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block);
2166          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2167          if ((min == 0 && (max == 1 || max == -1)) ||          if ((min == 0 && (max == 1 || max == -1)) ||
2168            (min == 1 && max == -1))            (min == 1 && max == -1))
# Line 1674  while ((c = *(++ptr)) != 0) Line 2176  while ((c = *(++ptr)) != 0)
2176      /* Brackets may be genuine groups or special things */      /* Brackets may be genuine groups or special things */
2177    
2178      case '(':      case '(':
2179        branch_newextra = 0;
2180    
2181        /* Handle special forms of bracket, which all start (? */
2182    
2183        if (ptr[1] == '?')
2184          {
2185          int set, unset;
2186          int *optset;
2187    
2188          switch (c = ptr[2])
2189            {
2190            /* Skip over comments entirely */
2191            case '#':
2192            ptr += 3;
2193            while (*ptr != 0 && *ptr != ')') ptr++;
2194            if (*ptr == 0)
2195              {
2196              *errorptr = ERR18;
2197              goto PCRE_ERROR_RETURN;
2198              }
2199            continue;
2200    
2201            /* Non-referencing groups and lookaheads just move the pointer on, and
2202            then behave like a non-special bracket, except that they don't increment
2203            the count of extracting brackets. Ditto for the "once only" bracket,
2204            which is in Perl from version 5.005. */
2205    
2206            case ':':
2207            case '=':
2208            case '!':
2209            case '>':
2210            ptr += 2;
2211            break;
2212    
2213            /* Lookbehinds are in Perl from version 5.005 */
2214    
2215            case '<':
2216            if (ptr[3] == '=' || ptr[3] == '!')
2217              {
2218              ptr += 3;
2219              branch_newextra = 3;
2220              length += 3;         /* For the first branch */
2221              break;
2222              }
2223            *errorptr = ERR24;
2224            goto PCRE_ERROR_RETURN;
2225    
2226            /* Conditionals are in Perl from version 5.005. The bracket must either
2227            be followed by a number (for bracket reference) or by an assertion
2228            group. */
2229    
2230            case '(':
2231            if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0)
2232              {
2233              ptr += 4;
2234              length += 2;
2235              while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++;
2236              if (*ptr != ')')
2237                {
2238                *errorptr = ERR26;
2239                goto PCRE_ERROR_RETURN;
2240                }
2241              }
2242            else   /* An assertion must follow */
2243              {
2244              ptr++;   /* Can treat like ':' as far as spacing is concerned */
2245    
2246              if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)
2247                {
2248                ptr += 2;    /* To get right offset in message */
2249                *errorptr = ERR28;
2250                goto PCRE_ERROR_RETURN;
2251                }
2252              }
2253            break;
2254    
2255            /* Else loop checking valid options until ) is met. Anything else is an
2256            error. If we are without any brackets, i.e. at top level, the settings
2257            act as if specified in the options, so massage the options immediately.
2258            This is for backward compatibility with Perl 5.004. */
2259    
2260            default:
2261            set = unset = 0;
2262            optset = &set;
2263            ptr += 2;
2264    
2265            for (;; ptr++)
2266              {
2267              c = *ptr;
2268              switch (c)
2269                {
2270                case 'i':
2271                *optset |= PCRE_CASELESS;
2272                continue;
2273    
2274                case 'm':
2275                *optset |= PCRE_MULTILINE;
2276                continue;
2277    
2278                case 's':
2279                *optset |= PCRE_DOTALL;
2280                continue;
2281    
2282                case 'x':
2283                *optset |= PCRE_EXTENDED;
2284                continue;
2285    
2286                case 'X':
2287                *optset |= PCRE_EXTRA;
2288                continue;
2289    
2290                case 'U':
2291                *optset |= PCRE_UNGREEDY;
2292                continue;
2293    
2294      /* Handle special forms of bracket, which all start (? */              case '-':
2295                optset = &unset;
2296                continue;
2297    
2298      if (ptr[1] == '?') switch (c = ptr[2])              /* A termination by ')' indicates an options-setting-only item;
2299        {              this is global at top level; otherwise nothing is done here and
2300        /* Skip over comments entirely */              it is handled during the compiling process on a per-bracket-group
2301        case '#':              basis. */
       ptr += 3;  
       while (*ptr != 0 && *ptr != ')') ptr++;  
       if (*ptr == 0)  
         {  
         *errorptr = ERR18;  
         goto PCRE_ERROR_RETURN;  
         }  
       continue;  
2302    
2303        /* Non-referencing groups and lookaheads just move the pointer on, and              case ')':
2304        then behave like a non-special bracket, except that they don't increment              if (brastackptr == 0)
2305        the count of extracting brackets. */                {
2306                  options = (options | set) & (~unset);
2307        case ':':                set = unset = 0;     /* To save length */
2308        case '=':                }
2309        case '!':              /* Fall through */
       ptr += 2;  
       break;  
2310    
2311        /* Ditto for the "once only" bracket, allowed only if the extra bit              /* A termination by ':' indicates the start of a nested group with
2312        is set. */              the given options set. This is again handled at compile time, but
2313                we must allow for compiled space if any of the ims options are
2314                set. We also have to allow for resetting space at the end of
2315                the group, which is why 4 is added to the length and not just 2.
2316                If there are several changes of options within the same group, this
2317                will lead to an over-estimate on the length, but this shouldn't
2318                matter very much. We also have to allow for resetting options at
2319                the start of any alternations, which we do by setting
2320                branch_newextra to 2. */
2321    
2322        case '>':              case ':':
2323        if ((options & PCRE_EXTRA) != 0)              if (((set|unset) & PCRE_IMS) != 0)
2324          {                {
2325          ptr += 2;                length += 4;
2326          break;                branch_newextra = 2;
2327          }                }
2328        /* Else fall thourh */              goto END_OPTIONS;
2329    
2330        /* Else loop setting valid options until ) is met. Anything else is an              /* Unrecognized option character */
       error. */  
2331    
2332        default:              default:
2333        ptr += 2;              *errorptr = ERR12;
2334        for (;; ptr++)              goto PCRE_ERROR_RETURN;
2335          {              }
         if ((c = *ptr) == 'i')  
           {  
           options |= PCRE_CASELESS;  
           continue;  
           }  
         else if ((c = *ptr) == 'm')  
           {  
           options |= PCRE_MULTILINE;  
           continue;  
           }  
         else if (c == 's')  
           {  
           options |= PCRE_DOTALL;  
           continue;  
2336            }            }
2337          else if (c == 'x')  
2338            /* If we hit a closing bracket, that's it - this is a freestanding
2339            option-setting. We need to ensure that branch_extra is updated if
2340            necessary. The only values branch_newextra can have here are 0 or 2.
2341            If the value is 2, then branch_extra must either be 2 or 5, depending
2342            on whether this is a lookbehind group or not. */
2343    
2344            END_OPTIONS:
2345            if (c == ')')
2346            {            {
2347            options |= PCRE_EXTENDED;            if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2348            length -= spaces;          /* Already counted spaces */              branch_extra += branch_newextra;
2349            continue;            continue;
2350            }            }
         else if (c == ')') break;  
2351    
2352          *errorptr = ERR12;          /* If options were terminated by ':' control comes here. Fall through
2353          goto PCRE_ERROR_RETURN;          to handle the group below. */
2354          }          }
       continue;                      /* End of this bracket handling */  
2355        }        }
2356    
2357      /* Extracting brackets must be counted so we can process escapes in a      /* Extracting brackets must be counted so we can process escapes in a
# Line 1753  while ((c = *(++ptr)) != 0) Line 2360  while ((c = *(++ptr)) != 0)
2360      else bracount++;      else bracount++;
2361    
2362      /* Non-special forms of bracket. Save length for computing whole length      /* Non-special forms of bracket. Save length for computing whole length
2363      at end if there's a repeat that requires duplication of the group. */      at end if there's a repeat that requires duplication of the group. Also
2364        save the current value of branch_extra, and start the new group with
2365        the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
2366        for a lookbehind assertion. */
2367    
2368      if (brastackptr >= sizeof(brastack)/sizeof(int))      if (brastackptr >= sizeof(brastack)/sizeof(int))
2369        {        {
# Line 1761  while ((c = *(++ptr)) != 0) Line 2371  while ((c = *(++ptr)) != 0)
2371        goto PCRE_ERROR_RETURN;        goto PCRE_ERROR_RETURN;
2372        }        }
2373    
2374        bralenstack[brastackptr] = branch_extra;
2375        branch_extra = branch_newextra;
2376    
2377      brastack[brastackptr++] = length;      brastack[brastackptr++] = length;
2378      length += 3;      length += 3;
2379      continue;      continue;
2380    
2381      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
2382      have to replicate this bracket up to that many times. */      have to replicate this bracket up to that many times. If brastackptr is
2383        0 this is an unmatched bracket which will generate an error, but take care
2384        not to try to access brastack[-1] when computing the length and restoring
2385        the branch_extra value. */
2386    
2387      case ')':      case ')':
2388      length += 3;      length += 3;
2389        {        {
2390        int min = 1;        int minval = 1;
2391        int max = 1;        int maxval = 1;
2392        int duplength = length - brastack[--brastackptr];        int duplength;
2393    
2394          if (brastackptr > 0)
2395            {
2396            duplength = length - brastack[--brastackptr];
2397            branch_extra = bralenstack[brastackptr];
2398            }
2399          else duplength = 0;
2400    
2401        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2402        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
2403    
2404        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2))        if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block))
2405          {          {
2406          ptr = read_repeat_counts(ptr+2, &min, &max, errorptr);          ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr,
2407              &compile_block);
2408          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2409          }          }
2410        else if (c == '*') { min = 0; max = -1; ptr++; }        else if (c == '*') { minval = 0; maxval = -1; ptr++; }
2411        else if (c == '+') { max = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
2412        else if (c == '?') { min = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
2413    
2414        /* If there is a minimum > 1 we have to replicate up to min-1 times; if        /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2415        there is a limited maximum we have to replicate up to max-1 times and        group, and if the maximum is greater than zero, we have to replicate
2416        allow for a BRAZERO item before each optional copy, as we also have to        maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2417        do before the first copy if the minimum is zero. */        bracket set - hence the 7. */
2418    
2419        if (min == 0) length++;        if (minval == 0)
2420          else if (min > 1) length += (min - 1) * duplength;          {
2421        if (max > min) length += (max - min) * (duplength + 1);          length++;
2422        }          if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2423            }
2424    
2425          /* When the minimum is greater than zero, 1 we have to replicate up to
2426          minval-1 times, with no additions required in the copies. Then, if
2427          there is a limited maximum we have to replicate up to maxval-1 times
2428          allowing for a BRAZERO item before each optional copy and nesting
2429          brackets for all but one of the optional copies. */
2430    
2431          else
2432            {
2433            length += (minval - 1) * duplength;
2434            if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
2435              length += (maxval - minval) * (duplength + 7) - 6;
2436            }
2437          }
2438      continue;      continue;
2439    
2440      /* Non-special character. For a run of such characters the length required      /* Non-special character. For a run of such characters the length required
# Line 1810  while ((c = *(++ptr)) != 0) Line 2448  while ((c = *(++ptr)) != 0)
2448      runlength = 0;      runlength = 0;
2449      do      do
2450        {        {
2451        if ((pcre_ctypes[c] & ctype_space) != 0)        if ((options & PCRE_EXTENDED) != 0)
         {  
         if ((options & PCRE_EXTENDED) != 0) continue;  
         spaces++;  
         }  
   
       if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2452          {          {
2453          while ((c = *(++ptr)) != 0 && c != '\n');          if ((compile_block.ctypes[c] & ctype_space) != 0) continue;
2454          continue;          if (c == '#')
2455              {
2456              while ((c = *(++ptr)) != 0 && c != '\n');
2457              continue;
2458              }
2459          }          }
2460    
2461        /* Backslash may introduce a data char or a metacharacter; stop the        /* Backslash may introduce a data char or a metacharacter; stop the
# Line 1827  while ((c = *(++ptr)) != 0) Line 2463  while ((c = *(++ptr)) != 0)
2463    
2464        if (c == '\\')        if (c == '\\')
2465          {          {
2466          uschar *saveptr = ptr;          const uschar *saveptr = ptr;
2467          c = check_escape(&ptr, errorptr, bracount, options, FALSE);          c = check_escape(&ptr, errorptr, bracount, options, FALSE,
2468              &compile_block);
2469          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;          if (*errorptr != NULL) goto PCRE_ERROR_RETURN;
2470          if (c < 0) { ptr = saveptr; break; }          if (c < 0) { ptr = saveptr; break; }
2471          }          }
# Line 1840  while ((c = *(++ptr)) != 0) Line 2477  while ((c = *(++ptr)) != 0)
2477    
2478      /* This "while" is the end of the "do" above. */      /* This "while" is the end of the "do" above. */
2479    
2480      while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (runlength < 255 &&
2481          (compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0);
2482    
2483      ptr--;      ptr--;
2484      length += runlength;      length += runlength;
# Line 1857  if (length > 65539) Line 2495  if (length > 65539)
2495    }    }
2496    
2497  /* Compute the size of data block needed and get it, either from malloc or  /* Compute the size of data block needed and get it, either from malloc or
2498  externally provided function. Put in the magic number and the options. */  externally provided function. We specify "code[0]" in the offsetof() expression
2499    rather than just "code", because it has been reported that one broken compiler
2500    fails on "code" because it is also an independent variable. It should make no
2501    difference to the value of the offsetof(). */
2502    
2503  size = length + offsetof(real_pcre, code);  size = length + offsetof(real_pcre, code[0]);
2504  re = (real_pcre *)(pcre_malloc)(size);  re = (real_pcre *)(pcre_malloc)(size);
2505    
2506  if (re == NULL)  if (re == NULL)
# Line 1868  if (re == NULL) Line 2509  if (re == NULL)
2509    return NULL;    return NULL;
2510    }    }
2511    
2512    /* Put in the magic number and the options. */
2513    
2514  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
2515  re->options = options;  re->options = options;
2516    re->tables = tables;
2517    
2518  /* Set up a starting, non-extracting bracket, then compile the expression. On  /* Set up a starting, non-extracting bracket, then compile the expression. On
2519  error, *errorptr will be set non-NULL, so we don't need to look at the result  error, *errorptr will be set non-NULL, so we don't need to look at the result
2520  of the function here. */  of the function here. */
2521    
2522  ptr = (uschar *)pattern;  ptr = (const uschar *)pattern;
2523  code = re->code;  code = re->code;
2524  *code = OP_BRA;  *code = OP_BRA;
2525  bracount = 0;  bracount = 0;
2526  (void)compile_regex(options, &bracount, &code, &ptr, errorptr);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2527      &compile_block);
2528  re->top_bracket = bracount;  re->top_bracket = bracount;
2529  re->top_backref = top_backref;  re->top_backref = top_backref;
2530    
# Line 1896  if debugging, leave the test till after Line 2541  if debugging, leave the test till after
2541  if (code - re->code > length) *errorptr = ERR23;  if (code - re->code > length) *errorptr = ERR23;
2542  #endif  #endif
2543    
2544    /* Give an error if there's back reference to a non-existent capturing
2545    subpattern. */
2546    
2547    if (top_backref > re->top_bracket) *errorptr = ERR15;
2548    
2549  /* Failed to compile */  /* Failed to compile */
2550    
2551  if (*errorptr != NULL)  if (*errorptr != NULL)
2552    {    {
2553    (pcre_free)(re);    (pcre_free)(re);
2554    PCRE_ERROR_RETURN:    PCRE_ERROR_RETURN:
2555    *erroroffset = ptr - (uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
2556    return NULL;    return NULL;
2557    }    }
2558    
2559  /* If the anchored option was not passed, set flag if we can determine that it  /* If the anchored option was not passed, set flag if we can determine that the
2560  is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if  pattern is anchored by virtue of ^ characters or \A or anything else (such as
2561  we can determine what the first character has to be, because that speeds up  starting with .* when DOTALL is set).
2562  unanchored matches no end. In the case of multiline matches, an alternative is  
2563  to set the PCRE_STARTLINE flag if all branches start with ^. */  Otherwise, see if we can determine what the first character has to be, because
2564    that speeds up unanchored matches no end. If not, see if we can set the
2565    PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2566    start with ^. and also when all branches start with .* for non-DOTALL matches.
2567    */
2568    
2569  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2570    {    {
2571    if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))    int temp_options = options;
2572      if (is_anchored(re->code, &temp_options))
2573      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
2574    else    else
2575      {      {
2576      int c = find_firstchar(re->code);      int ch = find_firstchar(re->code, &temp_options);
2577      if (c >= 0)      if (ch >= 0)
2578        {        {
2579        re->first_char = c;        re->first_char = ch;
2580        re->options |= PCRE_FIRSTSET;        re->options |= PCRE_FIRSTSET;
2581        }        }
2582      else if (is_startline(re->code))      else if (is_startline(re->code))
# Line 1933  if ((options & PCRE_ANCHORED) == 0) Line 2588  if ((options & PCRE_ANCHORED) == 0)
2588    
2589  #ifdef DEBUG  #ifdef DEBUG
2590    
2591  printf("Length = %d top_bracket = %d top_backref=%d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
2592    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
2593    
2594  if (re->options != 0)  if (re->options != 0)
2595    {    {
2596    printf("%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s\n",
2597      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2598      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2599      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2600      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2601      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
2602      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",      ((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "",
2603      ((re->options & PCRE_EXTRA) != 0)? "extra " : "");      ((re->options & PCRE_EXTRA) != 0)? "extra " : "",
2604        ((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : "");
2605    }    }
2606    
2607  if ((re->options & PCRE_FIRSTSET) != 0)  if ((re->options & PCRE_FIRSTSET) != 0)
# Line 1971  while (code < code_end) Line 2627  while (code < code_end)
2627    
2628    else switch(*code)    else switch(*code)
2629      {      {
2630        case OP_OPT:
2631        printf(" %.2x %s", code[1], OP_names[*code]);
2632        code++;
2633        break;
2634    
2635        case OP_COND:
2636        printf("%3d Cond", (code[1] << 8) + code[2]);
2637        code += 2;
2638        break;
2639    
2640        case OP_CREF:
2641        printf(" %.2d %s", code[1], OP_names[*code]);
2642        code++;
2643        break;
2644    
2645      case OP_CHARS:      case OP_CHARS:
2646      charlength = *(++code);      charlength = *(++code);
2647      printf("%3d ", charlength);      printf("%3d ", charlength);
# Line 1984  while (code < code_end) Line 2655  while (code < code_end)
2655      case OP_KET:      case OP_KET:
2656      case OP_ASSERT:      case OP_ASSERT:
2657      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2658        case OP_ASSERTBACK:
2659        case OP_ASSERTBACK_NOT:
2660      case OP_ONCE:      case OP_ONCE:
2661      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2662      code += 2;      code += 2;
2663      break;      break;
2664    
2665        case OP_REVERSE:
2666        printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2667        code += 2;
2668        break;
2669    
2670      case OP_STAR:      case OP_STAR:
2671      case OP_MINSTAR:      case OP_MINSTAR:
2672      case OP_PLUS:      case OP_PLUS:
# Line 2013  while (code < code_end) Line 2691  while (code < code_end)
2691      case OP_MINUPTO:      case OP_MINUPTO:
2692      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2693        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2694      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2695      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2696      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2697      code += 3;      code += 3;
# Line 2058  while (code < code_end) Line 2736  while (code < code_end)
2736    
2737      case OP_REF:      case OP_REF:
2738      printf("    \\%d", *(++code));      printf("    \\%d", *(++code));
2739      break;      code ++;
2740        goto CLASS_REF_REPEAT;
2741    
2742      case OP_CLASS:      case OP_CLASS:
2743        {        {
2744        int i, min, max;        int i, min, max;
   
2745        code++;        code++;
2746        printf("    [");        printf("    [");
2747    
# Line 2088  while (code < code_end) Line 2766  while (code < code_end)
2766        printf("]");        printf("]");
2767        code += 32;        code += 32;
2768    
2769          CLASS_REF_REPEAT:
2770    
2771        switch(*code)        switch(*code)
2772          {          {
2773          case OP_CRSTAR:          case OP_CRSTAR:
# Line 2145  return (pcre *)re; Line 2825  return (pcre *)re;
2825    
2826    
2827  /*************************************************  /*************************************************
 *        Match a character type                  *  
 *************************************************/  
   
 /* Not used in all the places it might be as it's sometimes faster  
 to put the code inline.  
   
 Arguments:  
   type        the character type  
   c           the character  
   dotall      the dotall flag  
   
 Returns:      TRUE if character is of the type  
 */  
   
 static BOOL  
 match_type(int type, int c, BOOL dotall)  
 {  
   
 #ifdef DEBUG  
 if (isprint(c)) printf("matching subject %c against ", c);  
   else printf("matching subject \\x%02x against ", c);  
 printf("%s\n", OP_names[type]);  
 #endif  
   
 switch(type)  
   {  
   case OP_ANY:            return dotall || c != '\n';  
   case OP_NOT_DIGIT:      return (pcre_ctypes[c] & ctype_digit) == 0;  
   case OP_DIGIT:          return (pcre_ctypes[c] & ctype_digit) != 0;  
   case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0;  
   case OP_WHITESPACE:     return (pcre_ctypes[c] & ctype_space) != 0;  
   case OP_NOT_WORDCHAR:   return (pcre_ctypes[c] & ctype_word) == 0;  
   case OP_WORDCHAR:       return (pcre_ctypes[c] & ctype_word) != 0;  
   }  
 return FALSE;  
 }  
   
   
   
 /*************************************************  
2828  *          Match a back-reference                *  *          Match a back-reference                *
2829  *************************************************/  *************************************************/
2830    
2831  /* If a back reference hasn't been set, the match fails.  /* If a back reference hasn't been set, the length that is passed is greater
2832    than the number of characters left in the string, so the match fails.
2833    
2834  Arguments:  Arguments:
2835    number      reference number    offset      index into the offset vector
2836    eptr        points into the subject    eptr        points into the subject
2837    length      length to be matched    length      length to be matched
2838    md          points to match data block    md          points to match data block
2839      ims         the ims flags
2840    
2841  Returns:      TRUE if matched  Returns:      TRUE if matched
2842  */  */
2843    
2844  static BOOL  static BOOL
2845  match_ref(int number, register uschar *eptr, int length, match_data *md)  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
2846      int ims)
2847  {  {
2848  uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[offset];
2849    
2850  #ifdef DEBUG  #ifdef DEBUG
2851  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2219  printf("\n"); Line 2862  printf("\n");
2862    
2863  /* Always fail if not enough characters left */  /* Always fail if not enough characters left */
2864    
2865  if (length > md->end_subject - p) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
2866    
2867  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
2868    
2869  if (md->caseless)  if ((ims & PCRE_CASELESS) != 0)
2870    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    {
2871      while (length-- > 0)
2872        if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE;
2873      }
2874  else  else
2875    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
2876    
# Line 2237  return TRUE; Line 2883  return TRUE;
2883  *         Match from current position            *  *         Match from current position            *
2884  *************************************************/  *************************************************/
2885    
2886  /* On entry ecode points to the first opcode, and eptr to the first character.  /* On entry ecode points to the first opcode, and eptr to the first character
2887    in the subject string, while eptrb holds the value of eptr at the start of the
2888    last bracketed group - used for breaking infinite loops matching zero-length
2889    strings.
2890    
2891  Arguments:  Arguments:
2892     eptr        pointer in subject     eptr        pointer in subject
2893     ecode       position in code     ecode       position in code
2894     offset_top  current top pointer     offset_top  current top pointer
2895     md          pointer to "static" info for the match     md          pointer to "static" info for the match
2896       ims         current /i, /m, and /s options
2897       condassert  TRUE if called to check a condition assertion
2898       eptrb       eptr at start of last bracket
2899    
2900  Returns:       TRUE if matched  Returns:       TRUE if matched
2901  */  */
2902    
2903  static BOOL  static BOOL
2904  match(register uschar *eptr, register uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode,
2905    match_data *md)    int offset_top, match_data *md, int ims, BOOL condassert, const uschar *eptrb)
2906  {  {
2907    int original_ims = ims;   /* Save for resetting on ')' */
2908    
2909  for (;;)  for (;;)
2910    {    {
2911      int op = (int)*ecode;
2912    int min, max, ctype;    int min, max, ctype;
2913    register int i;    register int i;
2914    register int c;    register int c;
2915    BOOL minimize;    BOOL minimize = FALSE;
2916    
2917    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening capturing bracket. If there is space in the offset vector, save
2918    match. We have to set the start offset if required and there is space    the current subject position in the working slot at the top of the vector. We
2919    in the offset vector so that it is available for subsequent back references    mustn't change the current values of the data slot, because they may be set
2920    if the bracket matches. However, if the bracket fails, we must put back the    from a previous iteration of this group, and be referred to by a reference
2921    previous value of both offsets in case they were set by a previous copy of    inside the group.
2922    the same bracket. Don't worry about setting the flag for the error case here;  
2923    that is handled in the code for KET. */    If the bracket fails to match, we need to restore this value and also the
2924      values of the final offsets, in case they were set by a previous iteration of
2925      the same bracket.
2926    
2927      If there isn't enough space in the offset vector, treat this as if it were a
2928      non-capturing bracket. Don't worry about setting the flag for the error case
2929      here; that is handled in the code for KET. */
2930    
2931    if ((int)*ecode >= OP_BRA)    if (op > OP_BRA)
2932      {      {
2933      int number = (*ecode - OP_BRA) << 1;      int number = op - OP_BRA;
2934      int save_offset1, save_offset2;      int offset = number << 1;
2935    
2936      #ifdef DEBUG  #ifdef DEBUG
2937      printf("start bracket %d\n", number/2);      printf("start bracket %d subject=", number);
2938      #endif      pchars(eptr, 16, TRUE, md);
2939        printf("\n");
2940    #endif
2941    
2942      if (number > 0 && number < md->offset_end)      if (offset < md->offset_max)
2943        {        {
2944        save_offset1 = md->offset_vector[number];        int save_offset1 = md->offset_vector[offset];
2945        save_offset2 = md->offset_vector[number+1];        int save_offset2 = md->offset_vector[offset+1];
2946        md->offset_vector[number] = eptr - md->start_subject;        int save_offset3 = md->offset_vector[md->offset_end - number];
2947    
2948          DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
2949          md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
2950    
2951          do
2952            {
2953            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2954            ecode += (ecode[1] << 8) + ecode[2];
2955            }
2956          while (*ecode == OP_ALT);
2957    
2958        #ifdef DEBUG        DPRINTF(("bracket %d failed\n", number));
2959        printf("saving %d %d\n", save_offset1, save_offset2);  
2960        #endif        md->offset_vector[offset] = save_offset1;
2961          md->offset_vector[offset+1] = save_offset2;
2962          md->offset_vector[md->offset_end - number] = save_offset3;
2963          return FALSE;
2964        }        }
2965    
2966      /* Recurse for all the alternatives. */      /* Insufficient room for saving captured contents */
2967    
2968        else op = OP_BRA;
2969        }
2970    
2971      /* Other types of node can be handled by a switch */
2972    
2973      switch(op)
2974        {
2975        case OP_BRA:     /* Non-capturing bracket: optimized */
2976        DPRINTF(("start bracket 0\n"));
2977      do      do
2978        {        {
2979        if (match(eptr, ecode+3, offset_top, md)) return TRUE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2980        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2981        }        }
2982      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2983        DPRINTF(("bracket 0 failed\n"));
2984        return FALSE;
2985    
2986      #ifdef DEBUG      /* Conditional group: compilation checked that there are no more than
2987      printf("bracket %d failed\n", number/2);      two branches. If the condition is false, skipping the first branch takes us
2988      #endif      past the end if there is only one branch, but that's OK because that is
2989        exactly what going to the ket would do. */
2990    
2991        case OP_COND:
2992        if (ecode[3] == OP_CREF)         /* Condition is extraction test */
2993          {
2994          int offset = ecode[4] << 1;    /* Doubled reference number */
2995          return match(eptr,
2996            ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
2997              5 : 3 + (ecode[1] << 8) + ecode[2]),
2998            offset_top, md, ims, FALSE, eptr);
2999          }
3000    
3001        /* The condition is an assertion. Call match() to evaluate it - setting
3002        the final argument TRUE causes it to stop at the end of an assertion. */
3003    
3004      if (number > 0 && number < md->offset_end)      else
3005        {        {
3006        md->offset_vector[number] = save_offset1;        if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))
3007        md->offset_vector[number+1] = save_offset2;          {
3008            ecode += 3 + (ecode[4] << 8) + ecode[5];
3009            while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
3010            }
3011          else ecode += (ecode[1] << 8) + ecode[2];
3012          return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);
3013        }        }
3014        /* Control never reaches here */
3015    
3016      return FALSE;      /* Skip over conditional reference data if encountered (should not be) */
     }  
3017    
3018    /* Other types of node can be handled by a switch */      case OP_CREF:
3019        ecode += 2;
3020        break;
3021    
3022        /* End of the pattern */
3023    
   switch(*ecode)  
     {  
3024      case OP_END:      case OP_END:
3025      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
3026      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
3027      return TRUE;      return TRUE;
3028    
3029      /* The equivalent of Prolog's "cut" - if the rest doesn't match, the      /* Change option settings */
     whole thing doesn't match, so we have to get out via a longjmp(). */  
3030    
3031      case OP_CUT:      case OP_OPT:
3032      if (match(eptr, ecode+1, offset_top, md)) return TRUE;      ims = ecode[1];
3033      longjmp(md->fail_env, 1);      ecode += 2;
3034        DPRINTF(("ims set to %02x\n", ims));
3035        break;
3036    
3037      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
3038      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
3039      the assertion is true. */      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
3040        start of each branch to move the current point backwards, so the code at
3041        this level is identical to the lookahead case. */
3042    
3043      case OP_ASSERT:      case OP_ASSERT:
3044        case OP_ASSERTBACK:
3045      do      do
3046        {        {
3047        if (match(eptr, ecode+3, offset_top, md)) break;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;
3048        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3049        }        }
3050      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3051      if (*ecode == OP_KET) return FALSE;      if (*ecode == OP_KET) return FALSE;
3052    
3053        /* If checking an assertion for a condition, return TRUE. */
3054    
3055        if (condassert) return TRUE;
3056    
3057      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
3058      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
3059    
# Line 2349  for (;;) Line 3065  for (;;)
3065      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match */
3066    
3067      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
3068        case OP_ASSERTBACK_NOT:
3069      do      do
3070        {        {
3071        if (match(eptr, ecode+3, offset_top, md)) return FALSE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;
3072        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
3073        }        }
3074      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
3075    
3076        if (condassert) return TRUE;
3077      ecode += 3;      ecode += 3;
3078      continue;      continue;
3079    
3080        /* Move the subject pointer back. This occurs only at the start of
3081        each branch of a lookbehind assertion. If we are too close to the start to
3082        move back, this match function fails. */
3083    
3084        case OP_REVERSE:
3085        eptr -= (ecode[1] << 8) + ecode[2];
3086        if (eptr < md->start_subject) return FALSE;
3087        ecode += 3;
3088        break;
3089    
3090    
3091      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
3092      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
3093      a back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
3094      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
3095      matches, we carry on, leaving the subject pointer. */      matches, we carry on as at the end of a normal bracket, leaving the subject
3096        pointer. */
3097    
3098      case OP_ONCE:      case OP_ONCE:
     do  
3099        {        {
3100        if (match(eptr, ecode+3, offset_top, md)) break;        const uschar *prev = ecode;
       ecode += (ecode[1] << 8) + ecode[2];  
       }  
     while (*ecode == OP_ALT);  
     if (*ecode == OP_KET) return FALSE;  
3101    
3102      /* Continue as from after the assertion, updating the offsets high water        do
3103      mark, since extracts may have been taken. */          {
3104            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;
3105            ecode += (ecode[1] << 8) + ecode[2];
3106            }
3107          while (*ecode == OP_ALT);
3108    
3109      do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);        /* If hit the end of the group (which could be repeated), fail */
3110      ecode += 3;  
3111      offset_top = md->end_offset_top;        if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
3112      eptr = md->end_match_ptr;  
3113      continue;        /* Continue as from after the assertion, updating the offsets high water
3114          mark, since extracts may have been taken. */
3115    
3116          do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
3117    
3118          offset_top = md->end_offset_top;
3119          eptr = md->end_match_ptr;
3120    
3121          /* For a non-repeating ket, just continue at this level. This also
3122          happens for a repeating ket if no characters were matched in the group.
3123          This is the forcible breaking of infinite loops as implemented in Perl
3124          5.005. If there is an options reset, it will get obeyed in the normal
3125          course of events. */
3126    
3127          if (*ecode == OP_KET || eptr == eptrb)
3128            {
3129            ecode += 3;
3130            break;
3131            }
3132    
3133          /* The repeating kets try the rest of the pattern or restart from the
3134          preceding bracket, in the appropriate order. We need to reset any options
3135          that changed within the bracket before re-running it, so check the next
3136          opcode. */
3137    
3138          if (ecode[3] == OP_OPT)
3139            {
3140            ims = (ims & ~PCRE_IMS) | ecode[4];
3141            DPRINTF(("ims set to %02x at group repeat\n", ims));
3142            }
3143    
3144          if (*ecode == OP_KETRMIN)
3145            {
3146            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3147                match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3148            }
3149          else  /* OP_KETRMAX */
3150            {
3151            if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3152                match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3153            }
3154          }
3155        return FALSE;
3156    
3157      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
3158      bracketed group and go to there. */      bracketed group and go to there. */
# Line 2397  for (;;) Line 3169  for (;;)
3169    
3170      case OP_BRAZERO:      case OP_BRAZERO:
3171        {        {
3172        uschar *next = ecode+1;        const uschar *next = ecode+1;
3173        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;
3174        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3175        ecode = next + 3;        ecode = next + 3;
3176        }        }
# Line 2406  for (;;) Line 3178  for (;;)
3178    
3179      case OP_BRAMINZERO:      case OP_BRAMINZERO:
3180        {        {
3181        uschar *next = ecode+1;        const uschar *next = ecode+1;
3182        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3183        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3184        ecode++;        ecode++;
3185        }        }
3186      break;;      break;
3187    
3188      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. If we are at the end of
3189      an assertion "group", stop matching and return TRUE, but record the      an assertion "group", stop matching and return TRUE, but record the
3190      current high water mark for use by positive assertions. */      current high water mark for use by positive assertions. Do this also
3191        for the "once" (not-backup up) groups. */
3192    
3193      case OP_KET:      case OP_KET:
3194      case OP_KETRMIN:      case OP_KETRMIN:
3195      case OP_KETRMAX:      case OP_KETRMAX:
3196        {        {
3197        int number;        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
       uschar *prev = ecode - (ecode[1] << 8) - ecode[2];  
3198    
3199        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3200              *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3201              *prev == OP_ONCE)
3202          {          {
3203          md->end_match_ptr = eptr;      /* For ONCE */          md->end_match_ptr = eptr;      /* For ONCE */
3204          md->end_offset_top = offset_top;          md->end_offset_top = offset_top;
3205          return TRUE;          return TRUE;
3206          }          }
3207    
3208        /* In all other cases we have to check the group number back at the        /* In all other cases except a conditional group we have to check the
3209        start and if necessary complete handling an extraction by setting the        group number back at the start and if necessary complete handling an
3210        final offset and bumping the high water mark. */        extraction by setting the offsets and bumping the high water mark. */
3211    
3212        number = (*prev - OP_BRA) << 1;        if (*prev != OP_COND)
3213            {
3214            int number = *prev - OP_BRA;
3215            int offset = number << 1;
3216    
3217        #ifdef DEBUG          DPRINTF(("end bracket %d\n", number));
       printf("end bracket %d\n", number/2);  
       #endif  
3218    
3219        if (number > 0)          if (number > 0)
         {  
         if (number >= md->offset_end) md->offset_overflow = TRUE; else  
3220            {            {
3221            md->offset_vector[number+1] = eptr - md->start_subject;            if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3222            if (offset_top <= number) offset_top = number + 2;              {
3223                md->offset_vector[offset] =
3224                  md->offset_vector[md->offset_end - number];
3225                md->offset_vector[offset+1] = eptr - md->start_subject;
3226                if (offset_top <= offset) offset_top = offset + 2;
3227                }
3228            }            }
3229          }          }
3230    
3231        /* For a non-repeating ket, just advance to the next node and continue at        /* Reset the value of the ims flags, in case they got changed during
3232        this level. */        the group. */
3233    
3234          ims = original_ims;
3235          DPRINTF(("ims reset to %02x\n", ims));
3236    
3237        if (*ecode == OP_KET)        /* For a non-repeating ket, just continue at this level. This also
3238          happens for a repeating ket if no characters were matched in the group.
3239          This is the forcible breaking of infinite loops as implemented in Perl
3240          5.005. If there is an options reset, it will get obeyed in the normal
3241          course of events. */
3242    
3243          if (*ecode == OP_KET || eptr == eptrb)
3244          {          {
3245          ecode += 3;          ecode += 3;
3246          break;          break;
# Line 2464  for (;;) Line 3251  for (;;)
3251    
3252        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3253          {          {
3254          if (match(eptr, ecode+3, offset_top, md) ||          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3255              match(eptr, prev, offset_top, md)) return TRUE;              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3256          }          }
3257        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3258          {          {
3259          if (match(eptr, prev, offset_top, md) ||          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3260              match(eptr, ecode+3, offset_top, md)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3261          }          }
3262        }        }
3263      return FALSE;      return FALSE;
# Line 2479  for (;;) Line 3266  for (;;)
3266    
3267      case OP_CIRC:      case OP_CIRC:
3268      if (md->notbol && eptr == md->start_subject) return FALSE;      if (md->notbol && eptr == md->start_subject) return FALSE;
3269      if (md->multiline)      if ((ims & PCRE_MULTILINE) != 0)
3270        {        {
3271        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
3272        ecode++;        ecode++;
# Line 2494  for (;;) Line 3281  for (;;)
3281      ecode++;      ecode++;
3282      break;      break;
3283    
3284      /* Assert before internal newline if multiline, or before      /* Assert before internal newline if multiline, or before a terminating
3285      a terminating newline unless endonly is set, else end of subject unless      newline unless endonly is set, else end of subject unless noteol is set. */
     noteol is set. */  
3286    
3287      case OP_DOLL:      case OP_DOLL:
3288      if (md->noteol && eptr >= md->end_subject) return FALSE;      if ((ims & PCRE_MULTILINE) != 0)
     if (md->multiline)  
3289        {        {
3290        if (eptr < md->end_subject && *eptr != '\n') return FALSE;        if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
3291            else { if (md->noteol) return FALSE; }
3292        ecode++;        ecode++;
3293        break;        break;
3294        }        }
3295      else if (!md->endonly)      else
3296        {        {
3297        if (eptr < md->end_subject - 1 ||        if (md->noteol) return FALSE;
3298           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;        if (!md->endonly)
3299        ecode++;          {
3300        break;          if (eptr < md->end_subject - 1 ||
3301               (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3302    
3303            ecode++;
3304            break;
3305            }
3306        }        }
3307      /* ... else fall through */      /* ... else fall through */
3308    
3309      /* End of subject assertion */      /* End of subject assertion (\z) */
3310    
3311      case OP_EOD:      case OP_EOD:
3312      if (eptr < md->end_subject) return FALSE;      if (eptr < md->end_subject) return FALSE;
3313      ecode++;      ecode++;
3314      break;      break;
3315    
3316        /* End of subject or ending \n assertion (\Z) */
3317    
3318        case OP_EODN:
3319        if (eptr < md->end_subject - 1 ||
3320           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3321        ecode++;
3322        break;
3323    
3324      /* Word boundary assertions */      /* Word boundary assertions */
3325    
3326      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
3327      case OP_WORD_BOUNDARY:      case OP_WORD_BOUNDARY:
3328        {        {
3329        BOOL prev_is_word = (eptr != md->start_subject) &&        BOOL prev_is_word = (eptr != md->start_subject) &&
3330          ((pcre_ctypes[eptr[-1]] & ctype_word) != 0);          ((md->ctypes[eptr[-1]] & ctype_word) != 0);
3331        BOOL cur_is_word = (eptr < md->end_subject) &&        BOOL cur_is_word = (eptr < md->end_subject) &&
3332          ((pcre_ctypes[*eptr] & ctype_word) != 0);          ((md->ctypes[*eptr] & ctype_word) != 0);
3333        if ((*ecode++ == OP_WORD_BOUNDARY)?        if ((*ecode++ == OP_WORD_BOUNDARY)?
3334             cur_is_word == prev_is_word : cur_is_word != prev_is_word)             cur_is_word == prev_is_word : cur_is_word != prev_is_word)
3335          return FALSE;          return FALSE;
# Line 2540  for (;;) Line 3339  for (;;)
3339      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
3340    
3341      case OP_ANY:      case OP_ANY:
3342      if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
3343          return FALSE;
3344      if (eptr++ >= md->end_subject) return FALSE;      if (eptr++ >= md->end_subject) return FALSE;
3345      ecode++;      ecode++;
3346      break;      break;
3347    
3348      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
3349      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0)      if (eptr >= md->end_subject ||
3350           (md->ctypes[*eptr++] & ctype_digit) != 0)
3351        return FALSE;        return FALSE;
3352      ecode++;      ecode++;
3353      break;      break;
3354    
3355      case OP_DIGIT:      case OP_DIGIT:
3356      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0)      if (eptr >= md->end_subject ||
3357           (md->ctypes[*eptr++] & ctype_digit) == 0)
3358        return FALSE;        return FALSE;
3359      ecode++;      ecode++;
3360      break;      break;
3361    
3362      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
3363      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0)      if (eptr >= md->end_subject ||
3364           (md->ctypes[*eptr++] & ctype_space) != 0)
3365        return FALSE;        return FALSE;
3366      ecode++;      ecode++;
3367      break;      break;
3368    
3369      case OP_WHITESPACE:      case OP_WHITESPACE:
3370      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0)      if (eptr >= md->end_subject ||
3371           (md->ctypes[*eptr++] & ctype_space) == 0)
3372        return FALSE;        return FALSE;
3373      ecode++;      ecode++;
3374      break;      break;
3375    
3376      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
3377      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0)      if (eptr >= md->end_subject ||
3378           (md->ctypes[*eptr++] & ctype_word) != 0)
3379        return FALSE;        return FALSE;
3380      ecode++;      ecode++;
3381      break;      break;
3382    
3383      case OP_WORDCHAR:      case OP_WORDCHAR:
3384      if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0)      if (eptr >= md->end_subject ||
3385           (md->ctypes[*eptr++] & ctype_word) == 0)
3386        return FALSE;        return FALSE;
3387      ecode++;      ecode++;
3388      break;      break;
# Line 2592  for (;;) Line 3398  for (;;)
3398      case OP_REF:      case OP_REF:
3399        {        {
3400        int length;        int length;
3401        int number = ecode[1] << 1;                /* Doubled reference number */        int offset = ecode[1] << 1;                /* Doubled reference number */
3402        ecode += 2;                                /* Advance past the item */        ecode += 2;                                /* Advance past the item */
3403    
3404        if (number >= offset_top || md->offset_vector[number] < 0)        /* If the reference is unset, set the length to be longer than the amount
3405          {        of subject left; this ensures that every attempt at a match fails. We
3406          md->errorcode = PCRE_ERROR_BADREF;        can't just fail here, because of the possibility of quantifiers with zero
3407          return FALSE;        minima. */
3408          }  
3409          length = (offset >= offset_top || md->offset_vector[offset] < 0)?
3410            md->end_subject - eptr + 1 :
3411            md->offset_vector[offset+1] - md->offset_vector[offset];
3412    
3413        length = md->offset_vector[number+1] - md->offset_vector[number];        /* Set up for repetition, or handle the non-repeated case */
3414    
3415        switch (*ecode)        switch (*ecode)
3416          {          {
# Line 2628  for (;;) Line 3437  for (;;)
3437          break;          break;
3438    
3439          default:               /* No repeat follows */          default:               /* No repeat follows */
3440          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3441          eptr += length;          eptr += length;
3442          continue;              /* With the main loop */          continue;              /* With the main loop */
3443          }          }
# Line 2644  for (;;) Line 3453  for (;;)
3453    
3454        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3455          {          {
3456          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3457          eptr += length;          eptr += length;
3458          }          }
3459    
# Line 2659  for (;;) Line 3468  for (;;)
3468          {          {
3469          for (i = min;; i++)          for (i = min;; i++)
3470            {            {
3471            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3472            if (i >= max || !match_ref(number, eptr, length, md))              return TRUE;
3473              if (i >= max || !match_ref(offset, eptr, length, md, ims))
3474              return FALSE;              return FALSE;
3475            eptr += length;            eptr += length;
3476            }            }
# Line 2671  for (;;) Line 3481  for (;;)
3481    
3482        else        else
3483          {          {
3484          uschar *pp = eptr;          const uschar *pp = eptr;
3485          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3486            {            {
3487            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
3488            eptr += length;            eptr += length;
3489            }            }
3490          while (eptr >= pp)          while (eptr >= pp)
3491            {            {
3492            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3493                return TRUE;
3494            eptr -= length;            eptr -= length;
3495            }            }
3496          return FALSE;          return FALSE;
# Line 2687  for (;;) Line 3498  for (;;)
3498        }        }
3499      /* Control never gets here */      /* Control never gets here */
3500    
3501    
3502    
3503      /* Match a character class, possibly repeatedly. Look past the end of the      /* Match a character class, possibly repeatedly. Look past the end of the
3504      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
3505      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. */
     matching was set at runtime but not at compile time, we have to check both  
     versions of a character. */  
3506    
3507      case OP_CLASS:      case OP_CLASS:
3508        {        {
3509        uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
3510        ecode += 33;               /* Advance past the item */        ecode += 33;                     /* Advance past the item */
3511    
3512        switch (*ecode)        switch (*ecode)
3513          {          {
# Line 2723  for (;;) Line 3534  for (;;)
3534          break;          break;
3535    
3536          default:               /* No repeat follows */          default:               /* No repeat follows */
3537          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
3538          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
3539          }          }
3540    
3541        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2741  for (;;) Line 3545  for (;;)
3545          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
3546          c = *eptr++;          c = *eptr++;
3547          if ((data[c/8] & (1 << (c&7))) != 0) continue;          if ((data[c/8] & (1 << (c&7))) != 0) continue;
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           }  
3548          return FALSE;          return FALSE;
3549          }          }
3550    
# Line 2761  for (;;) Line 3560  for (;;)
3560          {          {
3561          for (i = min;; i++)          for (i = min;; i++)
3562            {            {
3563            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3564                return TRUE;
3565            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
3566            c = *eptr++;            c = *eptr++;
3567            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
3568            return FALSE;            return FALSE;
3569            }            }
3570          /* Control never gets here */          /* Control never gets here */
# Line 2779  for (;;) Line 3574  for (;;)
3574    
3575        else        else
3576          {          {
3577          uschar *pp = eptr;          const uschar *pp = eptr;
3578          for (i = min; i < max; eptr++, i++)          for (i = min; i < max; eptr++, i++)
3579            {            {
3580            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3581            c = *eptr;            c = *eptr;
3582            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
3583            break;            break;
3584            }            }
3585    
3586          while (eptr >= pp)          while (eptr >= pp)
3587            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3588                return TRUE;
3589          return FALSE;          return FALSE;
3590          }          }
3591        }        }
# Line 2807  for (;;) Line 3598  for (;;)
3598        register int length = ecode[1];        register int length = ecode[1];
3599        ecode += 2;        ecode += 2;
3600    
3601        #ifdef DEBUG  #ifdef DEBUG    /* Sigh. Some compilers never learn. */
3602        if (eptr >= md->end_subject)        if (eptr >= md->end_subject)
3603          printf("matching subject <null> against pattern ");          printf("matching subject <null> against pattern ");
3604        else        else
# Line 2818  for (;;) Line 3609  for (;;)
3609          }          }
3610        pchars(ecode, length, FALSE, md);        pchars(ecode, length, FALSE, md);
3611        printf("\n");        printf("\n");
3612        #endif  #endif
3613    
3614        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
3615        if (md->caseless)        if ((ims & PCRE_CASELESS) != 0)
3616          {          {
3617          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0)
3618              if (md->lcc[*ecode++] != md->lcc[*eptr++])
3619                return FALSE;
3620          }          }
3621        else        else
3622          {          {
# Line 2875  for (;;) Line 3668  for (;;)
3668      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3669      characters and work backwards. */      characters and work backwards. */
3670    
3671      #ifdef DEBUG      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3672      printf("matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3673    
3674      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3675        {        {
3676        c = pcre_lcc[c];        c = md->lcc[c];
3677        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3678            if (c != md->lcc[*eptr++]) return FALSE;
3679        if (min == max) continue;        if (min == max) continue;
3680        if (minimize)        if (minimize)
3681          {          {
3682          for (i = min;; i++)          for (i = min;; i++)
3683            {            {
3684            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3685            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])              return TRUE;
3686              if (i >= max || eptr >= md->end_subject ||
3687                  c != md->lcc[*eptr++])
3688              return FALSE;              return FALSE;
3689            }            }
3690          /* Control never gets here */          /* Control never gets here */
3691          }          }
3692        else        else
3693          {          {
3694          uschar *pp = eptr;          const uschar *pp = eptr;
3695          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3696            {            {
3697            if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c != md->lcc[*eptr]) break;
3698            eptr++;            eptr++;
3699            }            }
3700          while (eptr >= pp)          while (eptr >= pp)
3701            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3702                return TRUE;
3703          return FALSE;          return FALSE;
3704          }          }
3705        /* Control never gets here */        /* Control never gets here */
# Line 2920  for (;;) Line 3715  for (;;)
3715          {          {
3716          for (i = min;; i++)          for (i = min;; i++)
3717            {            {
3718            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3719                return TRUE;
3720            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3721            }            }
3722          /* Control never gets here */          /* Control never gets here */
3723          }          }
3724        else        else
3725          {          {
3726          uschar *pp = eptr;          const uschar *pp = eptr;
3727          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3728            {            {
3729            if (eptr >= md->end_subject || c != *eptr) break;            if (eptr >= md->end_subject || c != *eptr) break;
3730            eptr++;            eptr++;
3731            }            }
3732          while (eptr >= pp)          while (eptr >= pp)
3733           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3734               return TRUE;
3735          return FALSE;          return FALSE;
3736          }          }
3737        }        }
# Line 2943  for (;;) Line 3740  for (;;)
3740      /* Match a negated single character */      /* Match a negated single character */
3741    
3742      case OP_NOT:      case OP_NOT:
3743      if (eptr > md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3744      ecode++;      ecode++;
3745      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3746        {        {
3747        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE;
3748        }        }
3749      else      else
3750        {        {
# Line 3002  for (;;) Line 3799  for (;;)
3799      maximum. Alternatively, if maximizing, find the maximum number of      maximum. Alternatively, if maximizing, find the maximum number of
3800      characters and work backwards. */      characters and work backwards. */
3801    
3802      #ifdef DEBUG      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3803      printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,        max, eptr));
       max, eptr);  
     #endif  
3804    
3805      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3806        {        {
3807        c = pcre_lcc[c];        c = md->lcc[c];
3808        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++)
3809            if (c == md->lcc[*eptr++]) return FALSE;
3810        if (min == max) continue;        if (min == max) continue;
3811        if (minimize)        if (minimize)
3812          {          {
3813          for (i = min;; i++)          for (i = min;; i++)
3814            {            {
3815            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3816            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])              return TRUE;
3817              if (i >= max || eptr >= md->end_subject ||
3818                  c == md->lcc[*eptr++])
3819              return FALSE;              return FALSE;
3820            }            }
3821          /* Control never gets here */          /* Control never gets here */
3822          }          }
3823        else        else
3824          {          {
3825          uschar *pp = eptr;          const uschar *pp = eptr;
3826          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3827            {            {
3828            if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break;            if (eptr >= md->end_subject || c == md->lcc[*eptr]) break;
3829            eptr++;            eptr++;
3830            }            }
3831          while (eptr >= pp)          while (eptr >= pp)
3832            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3833                return TRUE;
3834          return FALSE;          return FALSE;
3835          }          }
3836        /* Control never gets here */        /* Control never gets here */
# Line 3047  for (;;) Line 3846  for (;;)
3846          {          {
3847          for (i = min;; i++)          for (i = min;; i++)
3848            {            {
3849            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3850                return TRUE;
3851            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3852            }            }
3853          /* Control never gets here */          /* Control never gets here */
3854          }          }
3855        else        else
3856          {          {
3857          uschar *pp = eptr;          const uschar *pp = eptr;
3858          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3859            {            {
3860            if (eptr >= md->end_subject || c == *eptr) break;            if (eptr >= md->end_subject || c == *eptr) break;
3861            eptr++;            eptr++;
3862            }            }
3863          while (eptr >= pp)          while (eptr >= pp)
3864           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3865               return TRUE;
3866          return FALSE;          return FALSE;
3867          }          }
3868        }        }
# Line 3111  for (;;) Line 3912  for (;;)
3912      if (min > 0) switch(ctype)      if (min > 0) switch(ctype)
3913        {        {
3914        case OP_ANY:        case OP_ANY:
3915        if (!md->dotall)        if ((ims & PCRE_DOTALL) == 0)
3916          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3917        else eptr += min;        else eptr += min;
3918        break;        break;
3919    
3920        case OP_NOT_DIGIT:        case OP_NOT_DIGIT:
3921        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3922          if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE;
3923        break;        break;
3924    
3925        case OP_DIGIT:        case OP_DIGIT:
3926        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3927          if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE;
3928        break;        break;
3929    
3930        case OP_NOT_WHITESPACE:        case OP_NOT_WHITESPACE:
3931        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3932          if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE;
3933        break;        break;
3934    
3935        case OP_WHITESPACE:        case OP_WHITESPACE:
3936        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3937          if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE;          if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE;
3938        break;        break;
3939    
3940        case OP_NOT_WORDCHAR:        case OP_NOT_WORDCHAR:
3941        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0)        for (i = 1; i <= min; i++)
3942          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) != 0)
3943              return FALSE;
3944        break;        break;
3945    
3946        case OP_WORDCHAR:        case OP_WORDCHAR:
3947        for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0)        for (i = 1; i <= min; i++)
3948          return FALSE;          if ((md->ctypes[*eptr++] & ctype_word) == 0)
3949              return FALSE;
3950        break;        break;
3951        }        }
3952    
# Line 3152  for (;;) Line 3955  for (;;)
3955      if (min == max) continue;      if (min == max) continue;
3956    
3957      /* If minimizing, we have to test the rest of the pattern before each      /* If minimizing, we have to test the rest of the pattern before each
3958      subsequent match, so inlining isn't much help; just use the function. */      subsequent match. */
3959    
3960      if (minimize)      if (minimize)
3961        {        {
3962        for (i = min;; i++)        for (i = min;; i++)
3963          {          {
3964          if (match(eptr, ecode, offset_top, md)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
3965          if (i >= max || eptr >= md->end_subject ||          if (i >= max || eptr >= md->end_subject) return FALSE;
3966            !match_type(ctype, *eptr++, md->dotall))  
3967              return FALSE;          c = *eptr++;
3968            switch(ctype)
3969              {
3970              case OP_ANY:
3971              if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE;
3972              break;
3973    
3974              case OP_NOT_DIGIT:
3975              if ((md->ctypes[c] & ctype_digit) != 0) return FALSE;
3976              break;
3977    
3978              case OP_DIGIT:
3979              if ((md->ctypes[c] & ctype_digit) == 0) return FALSE;
3980              break;
3981    
3982              case OP_NOT_WHITESPACE:
3983              if ((md->ctypes[c] & ctype_space) != 0) return FALSE;
3984              break;
3985    
3986              case OP_WHITESPACE:
3987              if  ((md->ctypes[c] & ctype_space) == 0) return FALSE;
3988              break;
3989    
3990              case OP_NOT_WORDCHAR:
3991              if ((md->ctypes[c] & ctype_word) != 0) return FALSE;
3992              break;
3993    
3994              case OP_WORDCHAR:
3995              if ((md->ctypes[c] & ctype_word) == 0) return FALSE;
3996              break;
3997              }
3998          }          }
3999        /* Control never gets here */        /* Control never gets here */
4000        }        }
# Line 3171  for (;;) Line 4004  for (;;)
4004    
4005      else      else
4006        {        {
4007        uschar *pp = eptr;        const uschar *pp = eptr;
4008        switch(ctype)        switch(ctype)
4009          {          {
4010          case OP_ANY:          case OP_ANY:
4011          if (!md->dotall)          if ((ims & PCRE_DOTALL) == 0)
4012            {            {
4013            for (i = min; i < max; i++)            for (i = min; i < max; i++)
4014              {              {
# Line 3194  for (;;) Line 4027  for (;;)
4027          case OP_NOT_DIGIT:          case OP_NOT_DIGIT:
4028          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4029            {            {
4030            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0)
4031              break;              break;
4032            eptr++;            eptr++;
4033            }            }
# Line 3203  for (;;) Line 4036  for (;;)
4036          case OP_DIGIT:          case OP_DIGIT:
4037          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4038            {            {
4039            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0)
4040              break;              break;
4041            eptr++;            eptr++;
4042            }            }
# Line 3212  for (;;) Line 4045  for (;;)
4045          case OP_NOT_WHITESPACE:          case OP_NOT_WHITESPACE:
4046          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4047            {            {
4048            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0)
4049              break;              break;
4050            eptr++;            eptr++;
4051            }            }
# Line 3221  for (;;) Line 4054  for (;;)
4054          case OP_WHITESPACE:          case OP_WHITESPACE:
4055          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4056            {            {
4057            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0)
4058              break;              break;
4059            eptr++;            eptr++;
4060            }            }
# Line 3230  for (;;) Line 4063  for (;;)
4063          case OP_NOT_WORDCHAR:          case OP_NOT_WORDCHAR:
4064          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4065            {            {
4066            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0)
4067              break;              break;
4068            eptr++;            eptr++;
4069            }            }
# Line 3239  for (;;) Line 4072  for (;;)
4072          case OP_WORDCHAR:          case OP_WORDCHAR:
4073          for (i = min; i < max; i++)          for (i = min; i < max; i++)
4074            {            {
4075            if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0)            if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0)
4076              break;              break;
4077            eptr++;            eptr++;
4078            }            }
# Line 3247  for (;;) Line 4080  for (;;)
4080          }          }
4081    
4082        while (eptr >= pp)        while (eptr >= pp)
4083          if (match(eptr--, ecode, offset_top, md)) return TRUE;          if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
4084              return TRUE;