/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 21 by nigel, Sat Feb 24 21:38:37 2007 UTC revision 23 by nigel, Sat Feb 24 21:38:41 2007 UTC
# Line 33  restrictions: Line 33  restrictions:
33    
34  /* #define DEBUG */  /* #define DEBUG */
35    
36  /* Use a macro for debugging printing, 'cause that eliminates the the use  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
37  of #ifdef inline, and there are *still* stupid compilers about that don't like  inline, and there are *still* stupid compilers about that don't like indented
38  indented pre-processor statements. I suppose it's only been 10 years... */  pre-processor statements. I suppose it's only been 10 years... */
39    
40  #ifdef DEBUG  #ifdef DEBUG
41  #define DPRINTF(p) printf p  #define DPRINTF(p) printf p
# Line 56  the external pcre header. */ Line 56  the external pcre header. */
56  #endif  #endif
57    
58    
59    /* Number of items on the nested bracket stacks at compile time. This should
60    not be set greater than 200. */
61    
62    #define BRASTACK_SIZE 200
63    
64    
65  /* Min and max values for the common repeats; for the maxima, 0 => infinity */  /* Min and max values for the common repeats; for the maxima, 0 => infinity */
66    
67  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static const char rep_min[] = { 0, 0, 1, 1, 0, 0 };
# Line 66  static const char rep_max[] = { 0, 0, 0, Line 72  static const char rep_max[] = { 0, 0, 0,
72  #ifdef DEBUG  #ifdef DEBUG
73  static const char *OP_names[] = {  static const char *OP_names[] = {
74    "End", "\\A", "\\B", "\\b", "\\D", "\\d",    "End", "\\A", "\\B", "\\b", "\\D", "\\d",
75    "\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars",    "\\S", "\\s", "\\W", "\\w", "\\Z", "\\z",
76    "not",    "Opt", "^", "$", "Any", "chars", "not",
77    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
78    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
79    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
80    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
81    "class", "negclass", "Ref",    "class", "Ref",
82    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
83      "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
84    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
85  };  };
86  #endif  #endif
# Line 93  static const short int escapes[] = { Line 100  static const short int escapes[] = {
100    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */    '`',      7, -ESC_b,      0, -ESC_d,     27,   '\f',      0,   /* ` - g */
101      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */      0,      0,      0,      0,      0,      0,   '\n',      0,   /* h - o */
102      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */      0,      0,   '\r', -ESC_s,   '\t',      0,      0, -ESC_w,   /* p - w */
103      0,      0,      0                                            /* x - z */      0,      0, -ESC_z                                            /* x - z */
104  };  };
105    
106  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
107    
108  static BOOL  static BOOL
109    compile_regex(int, int *, uschar **, const uschar **, const char **);    compile_regex(int, int, int *, uschar **, const uschar **, const char **,
110        BOOL, int);
111    
112  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
113  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 108  typedef struct match_data { Line 116  typedef struct match_data {
116    int    errorcode;             /* As it says */    int    errorcode;             /* As it says */
117    int   *offset_vector;         /* Offset vector */    int   *offset_vector;         /* Offset vector */
118    int    offset_end;            /* One past the end */    int    offset_end;            /* One past the end */
119      int    offset_max;            /* The maximum usable for return data */
120    BOOL   offset_overflow;       /* Set if too many extractions */    BOOL   offset_overflow;       /* Set if too many extractions */
   BOOL   caseless;              /* Case-independent flag */  
   BOOL   runtime_caseless;      /* Caseless forced at run time */  
   BOOL   multiline;             /* Multiline flag */  
121    BOOL   notbol;                /* NOTBOL flag */    BOOL   notbol;                /* NOTBOL flag */
122    BOOL   noteol;                /* NOTEOL flag */    BOOL   noteol;                /* NOTEOL flag */
   BOOL   dotall;                /* Dot matches any char */  
123    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
124    const uschar *start_subject;  /* Start of the subject string */    const uschar *start_subject;  /* Start of the subject string */
125    const uschar *end_subject;    /* End of the subject string */    const uschar *end_subject;    /* End of the subject string */
   jmp_buf fail_env;             /* Environment for longjump() break out */  
126    const uschar *end_match_ptr;  /* Subject position at end match */    const uschar *end_match_ptr;  /* Subject position at end match */
127    int     end_offset_top;       /* Highwater mark at end of match */    int     end_offset_top;       /* Highwater mark at end of match */
128  } match_data;  } match_data;
# Line 218  while (length-- > 0) Line 222  while (length-- > 0)
222    
223    
224  /*************************************************  /*************************************************
 *         Check subpattern for empty operand     *  
 *************************************************/  
   
 /* This function checks a bracketed subpattern to see if any of the paths  
 through it could match an empty string. This is used to diagnose an error if  
 such a subpattern is followed by a quantifier with an unlimited upper bound.  
   
 Argument:  
   code      points to the opening bracket  
   
 Returns:    TRUE or FALSE  
 */  
   
 static BOOL  
 could_be_empty(uschar *code)  
 {  
 do {  
   uschar *cc = code + 3;  
   
   /* Scan along the opcodes for this branch; as soon as we find something  
   that matches a non-empty string, break out and advance to test the next  
   branch. If we get to the end of the branch, return TRUE for the whole  
   sub-expression. */  
   
   for (;;)  
     {  
     /* Test an embedded subpattern; if it could not be empty, break the  
     loop. Otherwise carry on in the branch. */  
   
     if ((int)(*cc) >= OP_BRA || (int)(*cc) == OP_ONCE)  
       {  
       if (!could_be_empty(cc)) break;  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       }  
   
     else switch (*cc)  
       {  
       /* Reached end of a branch: the subpattern may match the empty string */  
   
       case OP_ALT:  
       case OP_KET:  
       case OP_KETRMAX:  
       case OP_KETRMIN:  
       return TRUE;  
   
       /* Skip over entire bracket groups with zero lower bound */  
   
       case OP_BRAZERO:  
       case OP_BRAMINZERO:  
       cc++;  
       /* Fall through */  
   
       /* Skip over assertive subpatterns */  
   
       case OP_ASSERT:  
       case OP_ASSERT_NOT:  
       do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);  
       cc += 3;  
       break;  
   
       /* Skip over things that don't match chars */  
   
       case OP_SOD:  
       case OP_EOD:  
       case OP_CIRC:  
       case OP_DOLL:  
       case OP_NOT_WORD_BOUNDARY:  
       case OP_WORD_BOUNDARY:  
       cc++;  
       break;  
   
       /* Skip over simple repeats with zero lower bound */  
   
       case OP_STAR:  
       case OP_MINSTAR:  
       case OP_QUERY:  
       case OP_MINQUERY:  
       case OP_NOTSTAR:  
       case OP_NOTMINSTAR:  
       case OP_NOTQUERY:  
       case OP_NOTMINQUERY:  
       case OP_TYPESTAR:  
       case OP_TYPEMINSTAR:  
       case OP_TYPEQUERY:  
       case OP_TYPEMINQUERY:  
       cc += 2;  
       break;  
   
       /* Skip over UPTOs (lower bound is zero) */  
   
       case OP_UPTO:  
       case OP_MINUPTO:  
       case OP_TYPEUPTO:  
       case OP_TYPEMINUPTO:  
       cc += 4;  
       break;  
   
       /* Check a class or a back reference for a zero minimum */  
   
       case OP_CLASS:  
       case OP_NEGCLASS:  
       case OP_REF:  
       cc += (*cc == OP_REF)? 2 : 33;  
   
       switch (*cc)  
         {  
         case OP_CRSTAR:  
         case OP_CRMINSTAR:  
         case OP_CRQUERY:  
         case OP_CRMINQUERY:  
         cc++;  
         break;  
   
         case OP_CRRANGE:  
         case OP_CRMINRANGE:  
         if ((cc[1] << 8) + cc[2] != 0) goto NEXT_BRANCH;  
         cc += 3;  
         break;  
   
         default:  
         goto NEXT_BRANCH;  
         }  
       break;  
   
       /* Anything else matches at least one character */  
   
       default:  
       goto NEXT_BRANCH;  
       }  
     }  
   
   NEXT_BRANCH:  
   code += (code[1] << 8) + code[2];  
   }  
 while (*code == OP_ALT);  
   
 /* No branches match the empty string */  
   
 return FALSE;  
 }  
   
   
   
 /*************************************************  
225  *            Handle escapes                      *  *            Handle escapes                      *
226  *************************************************/  *************************************************/
227    
# Line 493  else Line 352  else
352      default:      default:
353      if ((options & PCRE_EXTRA) != 0) switch(c)      if ((options & PCRE_EXTRA) != 0) switch(c)
354        {        {
       case 'X':  
       c = -ESC_X;      /* This could be a lookup if it ever got into Perl */  
       break;  
   
355        default:        default:
356        *errorptr = ERR3;        *errorptr = ERR3;
357        break;        break;
# Line 600  return p; Line 455  return p;
455    
456    
457  /*************************************************  /*************************************************
458    *        Find the fixed length of a pattern      *
459    *************************************************/
460    
461    /* Scan a pattern and compute the fixed length of subject that will match it,
462    if the length is fixed. This is needed for dealing with backward assertions.
463    
464    Arguments:
465      code     points to the start of the pattern (the bracket)
466    
467    Returns:   the fixed length, or -1 if there is no fixed length
468    */
469    
470    static int
471    find_fixedlength(uschar *code)
472    {
473    int length = -1;
474    
475    register int branchlength = 0;
476    register uschar *cc = code + 3;
477    
478    /* Scan along the opcodes for this branch. If we get to the end of the
479    branch, check the length against that of the other branches. */
480    
481    for (;;)
482      {
483      int d;
484      register int op = *cc;
485      if (op >= OP_BRA) op = OP_BRA;
486    
487      switch (op)
488        {
489        case OP_BRA:
490        case OP_ONCE:
491        case OP_COND:
492        d = find_fixedlength(cc);
493        if (d < 0) return -1;
494        branchlength += d;
495        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
496        cc += 3;
497        break;
498    
499        /* Reached end of a branch; if it's a ket it is the end of a nested
500        call. If it's ALT it is an alternation in a nested call. If it is
501        END it's the end of the outer call. All can be handled by the same code. */
502    
503        case OP_ALT:
504        case OP_KET:
505        case OP_KETRMAX:
506        case OP_KETRMIN:
507        case OP_END:
508        if (length < 0) length = branchlength;
509          else if (length != branchlength) return -1;
510        if (*cc != OP_ALT) return length;
511        cc += 3;
512        branchlength = 0;
513        break;
514    
515        /* Skip over assertive subpatterns */
516    
517        case OP_ASSERT:
518        case OP_ASSERT_NOT:
519        case OP_ASSERTBACK:
520        case OP_ASSERTBACK_NOT:
521        do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT);
522        cc += 3;
523        break;
524    
525        /* Skip over things that don't match chars */
526    
527        case OP_REVERSE:
528        cc++;
529    
530        case OP_CREF:
531        case OP_OPT:
532        cc++;
533        /* Fall through */
534    
535        case OP_SOD:
536        case OP_EOD:
537        case OP_EODN:
538        case OP_CIRC:
539        case OP_DOLL:
540        case OP_NOT_WORD_BOUNDARY:
541        case OP_WORD_BOUNDARY:
542        cc++;
543        break;
544    
545        /* Handle char strings */
546    
547        case OP_CHARS:
548        branchlength += *(++cc);
549        cc += *cc + 1;
550        break;
551    
552        /* Handle exact repetitions */
553    
554        case OP_EXACT:
555        case OP_TYPEEXACT:
556        branchlength += (cc[1] << 8) + cc[2];
557        cc += 4;
558        break;
559    
560        /* Handle single-char matchers */
561    
562        case OP_NOT_DIGIT:
563        case OP_DIGIT:
564        case OP_NOT_WHITESPACE:
565        case OP_WHITESPACE:
566        case OP_NOT_WORDCHAR:
567        case OP_WORDCHAR:
568        case OP_ANY:
569        branchlength++;
570        cc++;
571        break;
572    
573    
574        /* Check a class for variable quantification */
575    
576        case OP_CLASS:
577        cc += (*cc == OP_REF)? 2 : 33;
578    
579        switch (*cc)
580          {
581          case OP_CRSTAR:
582          case OP_CRMINSTAR:
583          case OP_CRQUERY:
584          case OP_CRMINQUERY:
585          return -1;
586    
587          case OP_CRRANGE:
588          case OP_CRMINRANGE:
589          if ((cc[1] << 8) + cc[2] != (cc[3] << 8) + cc[4]) return -1;
590          branchlength += (cc[1] << 8) + cc[2];
591          cc += 5;
592          break;
593    
594          default:
595          branchlength++;
596          }
597        break;
598    
599        /* Anything else is variable length */
600    
601        default:
602        return -1;
603        }
604      }
605    /* Control never gets here */
606    }
607    
608    
609    
610    
611    /*************************************************
612  *           Compile one branch                   *  *           Compile one branch                   *
613  *************************************************/  *************************************************/
614    
615  /* Scan the pattern, compiling it into the code vector.  /* Scan the pattern, compiling it into the code vector.
616    
617  Arguments:  Arguments:
618    options    the option bits    options     the option bits
619    bracket    points to number of brackets used    brackets    points to number of brackets used
620    code       points to the pointer to the current code point    code        points to the pointer to the current code point
621    ptrptr     points to the current pattern pointer    ptrptr      points to the current pattern pointer
622    errorptr   points to pointer to error message    errorptr    points to pointer to error message
623      optchanged  set to the value of the last OP_OPT item compiled
624    
625  Returns:     TRUE on success  Returns:      TRUE on success
626               FALSE, with *errorptr set on error                FALSE, with *errorptr set on error
627  */  */
628    
629  static BOOL  static BOOL
630  compile_branch(int options, int *brackets, uschar **codeptr,  compile_branch(int options, int *brackets, uschar **codeptr,
631    const uschar **ptrptr, const char **errorptr)    const uschar **ptrptr, const char **errorptr, int *optchanged)
632  {  {
633  int repeat_type, op_type;  int repeat_type, op_type;
634  int repeat_min, repeat_max;  int repeat_min, repeat_max;
# Line 626  int bravalue, length; Line 636  int bravalue, length;
636  int greedy_default, greedy_non_default;  int greedy_default, greedy_non_default;
637  register int c;  register int c;
638  register uschar *code = *codeptr;  register uschar *code = *codeptr;
639    uschar *tempcode;
640  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
641  const uschar *oldptr;  const uschar *tempptr;
642  uschar *previous = NULL;  uschar *previous = NULL;
643  uschar class[32];  uschar class[32];
644    
# Line 641  greedy_non_default = greedy_default ^ 1; Line 652  greedy_non_default = greedy_default ^ 1;
652  for (;; ptr++)  for (;; ptr++)
653    {    {
654    BOOL negate_class;    BOOL negate_class;
655    int  class_charcount;    int class_charcount;
656    int  class_lastchar;    int class_lastchar;
657      int newoptions;
658      int condref;
659    
660    c = *ptr;    c = *ptr;
661    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
# Line 690  for (;; ptr++) Line 703  for (;; ptr++)
703    
704      case '[':      case '[':
705      previous = code;      previous = code;
706        *code++ = OP_CLASS;
707    
708      /* If the first character is '^', set the negation flag, and use a      /* If the first character is '^', set the negation flag and skip it. */
     different opcode. This only matters if caseless matching is specified at  
     runtime. */  
709    
710      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
711        {        {
712        negate_class = TRUE;        negate_class = TRUE;
       *code++ = OP_NEGCLASS;  
713        c = *(++ptr);        c = *(++ptr);
714        }        }
715      else      else negate_class = FALSE;
       {  
       negate_class = FALSE;  
       *code++ = OP_CLASS;  
       }  
716    
717      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
718      character. */      character. */
# Line 964  for (;; ptr++) Line 971  for (;; ptr++)
971      create a suitable repeat item. The code is shared with single-character      create a suitable repeat item. The code is shared with single-character
972      repeats by adding a suitable offset into repeat_type. */      repeats by adding a suitable offset into repeat_type. */
973    
974      else if ((int)*previous < OP_EOD || *previous == OP_ANY)      else if ((int)*previous < OP_EODN || *previous == OP_ANY)
975        {        {
976        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */        op_type = OP_TYPESTAR - OP_STAR;  /* Use type opcodes */
977        c = *previous;        c = *previous;
# Line 1050  for (;; ptr++) Line 1057  for (;; ptr++)
1057      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1058      stuff after it. */      stuff after it. */
1059    
1060      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||      else if (*previous == OP_CLASS || *previous == OP_REF)
              *previous == OP_REF)  
1061        {        {
1062        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1063          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 1071  for (;; ptr++) Line 1077  for (;; ptr++)
1077        }        }
1078    
1079      /* If previous was a bracket group, we may have to replicate it in certain      /* If previous was a bracket group, we may have to replicate it in certain
1080      cases. If the maximum repeat count is unlimited, check that the bracket      cases. */
     group cannot match the empty string, and diagnose an error if it can. */  
1081    
1082      else if ((int)*previous >= OP_BRA)      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1083                 (int)*previous == OP_COND)
1084        {        {
1085        int i;        int i, ketoffset = 0;
1086        int len = code - previous;        int len = code - previous;
1087    
1088        if (repeat_max == -1 && could_be_empty(previous))        /* If the maximum repeat count is unlimited, find the end of the bracket
1089          {        by scanning through from the start, and compute the offset back to it
1090          *errorptr = ERR10;        from the current code pointer. There may be an OP_OPT setting following
1091          goto FAILED;        the final KET, so we can't find the end just by going back from the code
1092          pointer. */
1093    
1094          if (repeat_max == -1)
1095            {
1096            register uschar *ket = previous;
1097            do ket += (ket[1] << 8) + ket[2]; while (*ket != OP_KET);
1098            ketoffset = code - ket;
1099          }          }
1100    
1101        /* If the minimum is greater than zero, and the maximum is unlimited or        /* If the minimum is greater than zero, and the maximum is unlimited or
# Line 1126  for (;; ptr++) Line 1139  for (;; ptr++)
1139            }            }
1140          }          }
1141    
1142        /* If the maximum is unlimited, set a repeater in the final copy. */        /* If the maximum is unlimited, set a repeater in the final copy. We
1143          can't just offset backwards from the current code point, because we
1144          don't know if there's been an options resetting after the ket. The
1145          correct offset was computed above. */
1146    
1147        if (repeat_max == -1) code[-3] = OP_KETRMAX + repeat_type;        if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;
1148        }        }
1149    
1150      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1145  for (;; ptr++) Line 1161  for (;; ptr++)
1161      break;      break;
1162    
1163    
1164      /* Start of nested bracket sub-expression, or comment or lookahead.      /* Start of nested bracket sub-expression, or comment or lookahead or
1165      First deal with special things that can come after a bracket; all are      lookbehind or option setting or condition. First deal with special things
1166      introduced by ?, and the appearance of any of them means that this is not a      that can come after a bracket; all are introduced by ?, and the appearance
1167      referencing group. They were checked for validity in the first pass over      of any of them means that this is not a referencing group. They were
1168      the string, so we don't have to check for syntax errors here.  */      checked for validity in the first pass over the string, so we don't have to
1169        check for syntax errors here.  */
1170    
1171      case '(':      case '(':
1172      previous = code;              /* Only real brackets can be repeated */      newoptions = options;
1173        condref = -1;
1174    
1175      if (*(++ptr) == '?')      if (*(++ptr) == '?')
1176        {        {
1177        bravalue = OP_BRA;        int set, unset;
1178          int *optset;
1179    
1180        switch (*(++ptr))        switch (*(++ptr))
1181          {          {
1182          case '#':          case '#':                 /* Comment; skip to ket */
         case 'i':  
         case 'm':  
         case 's':  
         case 'x':  
         case 'U':  
         case 'X':  
1183          ptr++;          ptr++;
1184          while (*ptr != ')') ptr++;          while (*ptr != ')') ptr++;
         previous = NULL;  
1185          continue;          continue;
1186    
1187          case ':':                 /* Non-extracting bracket */          case ':':                 /* Non-extracting bracket */
1188            bravalue = OP_BRA;
1189          ptr++;          ptr++;
1190          break;          break;
1191    
1192          case '=':                 /* Assertions can't be repeated */          case '(':
1193            bravalue = OP_COND;       /* Conditional group */
1194            if ((pcre_ctypes[*(++ptr)] & ctype_digit) != 0)
1195              {
1196              condref = *ptr - '0';
1197              while (*(++ptr) != ')') condref = condref*10 + *ptr - '0';
1198              ptr++;
1199              }
1200            else ptr--;
1201            break;
1202    
1203            case '=':                 /* Positive lookahead */
1204          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
1205          ptr++;          ptr++;
         previous = NULL;  
1206          break;          break;
1207    
1208          case '!':          case '!':                 /* Negative lookahead */
1209          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
1210          ptr++;          ptr++;
         previous = NULL;  
1211          break;          break;
1212    
1213          case '>':                         /* "Match once" brackets */          case '<':                 /* Lookbehinds */
1214          if ((options & PCRE_EXTRA) != 0)  /* Not yet standard */          switch (*(++ptr))
1215            {            {
1216            bravalue = OP_ONCE;            case '=':               /* Positive lookbehind */
1217              bravalue = OP_ASSERTBACK;
1218              ptr++;
1219              break;
1220    
1221              case '!':               /* Negative lookbehind */
1222              bravalue = OP_ASSERTBACK_NOT;
1223            ptr++;            ptr++;
           previous = NULL;  
1224            break;            break;
1225    
1226              default:                /* Syntax error */
1227              *errorptr = ERR24;
1228              goto FAILED;
1229            }            }
1230          /* Else fall through */          break;
1231    
1232          default:          case '>':                 /* One-time brackets */
1233          *errorptr = ERR12;          bravalue = OP_ONCE;
1234          goto FAILED;          ptr++;
1235            break;
1236    
1237            default:                  /* Option setting */
1238            set = unset = 0;
1239            optset = &set;
1240    
1241            while (*ptr != ')' && *ptr != ':')
1242              {
1243              switch (*ptr++)
1244                {
1245                case '-': optset = &unset; break;
1246    
1247                case 'i': *optset |= PCRE_CASELESS; break;
1248                case 'm': *optset |= PCRE_MULTILINE; break;
1249                case 's': *optset |= PCRE_DOTALL; break;
1250                case 'x': *optset |= PCRE_EXTENDED; break;
1251                case 'U': *optset |= PCRE_UNGREEDY; break;
1252                case 'X': *optset |= PCRE_EXTRA; break;
1253    
1254                default:
1255                *errorptr = ERR12;
1256                goto FAILED;
1257                }
1258              }
1259    
1260            /* Set up the changed option bits, but don't change anything yet. */
1261    
1262            newoptions = (options | set) & (~unset);
1263    
1264            /* If the options ended with ')' this is not the start of a nested
1265            group with option changes, so the options change at this level. At top
1266            level there is nothing else to be done (the options will in fact have
1267            been set from the start of compiling as a result of the first pass) but
1268            at an inner level we must compile code to change the ims options if
1269            necessary, and pass the new setting back so that it can be put at the
1270            start of any following branches, and when this group ends, a resetting
1271            item can be compiled. */
1272    
1273            if (*ptr == ')')
1274              {
1275              if ((options & PCRE_INGROUP) != 0 &&
1276                  (options & PCRE_IMS) != (newoptions & PCRE_IMS))
1277                {
1278                *code++ = OP_OPT;
1279                *code++ = *optchanged = newoptions & PCRE_IMS;
1280                }
1281              options = newoptions;  /* Change options at this level */
1282              previous = NULL;       /* This item can't be repeated */
1283              continue;              /* It is complete */
1284              }
1285    
1286            /* If the options ended with ':' we are heading into a nested group
1287            with possible change of options. Such groups are non-capturing and are
1288            not assertions of any kind. All we need to do is skip over the ':';
1289            the newoptions value is handled below. */
1290    
1291            bravalue = OP_BRA;
1292            ptr++;
1293          }          }
1294        }        }
1295    
1296      /* Else we have a referencing group */      /* Else we have a referencing group; adjust the opcode. */
1297    
1298      else      else
1299        {        {
# Line 1215  for (;; ptr++) Line 1305  for (;; ptr++)
1305        bravalue = OP_BRA + *brackets;        bravalue = OP_BRA + *brackets;
1306        }        }
1307    
1308      /* Process nested bracketed re; at end pointer is on the bracket. We copy      /* Process nested bracketed re. Assertions may not be repeated, but other
1309      code into a non-register variable in order to be able to pass its address      kinds can be. We copy code into a non-register variable in order to be able
1310      because some compilers complain otherwise. */      to pass its address because some compilers complain otherwise. Pass in a
1311        new setting for the ims options if they have changed. */
1312    
1313        previous = (bravalue >= OP_ONCE)? code : NULL;
1314      *code = bravalue;      *code = bravalue;
1315        tempcode = code;
1316    
1317        if (!compile_regex(
1318             options | PCRE_INGROUP,       /* Set for all nested groups */
1319             ((options & PCRE_IMS) != (newoptions & PCRE_IMS))?
1320               newoptions & PCRE_IMS : -1, /* Pass ims options if changed */
1321             brackets,                     /* Bracket level */
1322             &tempcode,                    /* Where to put code (updated) */
1323             &ptr,                         /* Input pointer (updated) */
1324             errorptr,                     /* Where to put an error message */
1325             (bravalue == OP_ASSERTBACK ||
1326              bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1327             condref))                     /* Condition reference number */
1328          goto FAILED;
1329    
1330        /* At the end of compiling, code is still pointing to the start of the
1331        group, while tempcode has been updated to point past the end of the group
1332        and any option resetting that may follow it. The pattern pointer (ptr)
1333        is on the bracket. */
1334    
1335        /* If this is a conditional bracket, check that there are no more than
1336        two branches in the group. */
1337    
1338        if (bravalue == OP_COND)
1339        {        {
1340        uschar *mcode = code;        int branchcount = 0;
1341        if (!compile_regex(options, brackets, &mcode, &ptr, errorptr))        uschar *tc = code;
1342    
1343          do {
1344             branchcount++;
1345             tc += (tc[1] << 8) | tc[2];
1346             }
1347          while (*tc != OP_KET);
1348    
1349          if (branchcount > 2)
1350            {
1351            *errorptr = ERR27;
1352          goto FAILED;          goto FAILED;
1353        code = mcode;          }
1354        }        }
1355    
1356        /* Now update the main code pointer to the end of the group. */
1357    
1358        code = tempcode;
1359    
1360        /* Error if hit end of pattern */
1361    
1362      if (*ptr != ')')      if (*ptr != ')')
1363        {        {
1364        *errorptr = ERR14;        *errorptr = ERR14;
# Line 1239  for (;; ptr++) Line 1371  for (;; ptr++)
1371      for validity in the pre-compiling pass. */      for validity in the pre-compiling pass. */
1372    
1373      case '\\':      case '\\':
1374      oldptr = ptr;      tempptr = ptr;
1375      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);      c = check_escape(&ptr, errorptr, *brackets, options, FALSE);
1376    
1377      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values      /* Handle metacharacters introduced by \. For ones like \d, the ESC_ values
# Line 1253  for (;; ptr++) Line 1385  for (;; ptr++)
1385        {        {
1386        if (-c >= ESC_REF)        if (-c >= ESC_REF)
1387          {          {
         int refnum = -c - ESC_REF;  
         if (*brackets < refnum)  
           {  
           *errorptr = ERR15;  
           goto FAILED;  
           }  
1388          previous = code;          previous = code;
1389          *code++ = OP_REF;          *code++ = OP_REF;
1390          *code++ = refnum;          *code++ = -c - ESC_REF;
1391          }          }
1392        else        else
1393          {          {
1394          previous = (-c > ESC_b && -c < ESC_X)? code : NULL;          previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
1395          *code++ = -c;          *code++ = -c;
1396          }          }
1397        continue;        continue;
# Line 1273  for (;; ptr++) Line 1399  for (;; ptr++)
1399    
1400      /* Data character: reset and fall through */      /* Data character: reset and fall through */
1401    
1402      ptr = oldptr;      ptr = tempptr;
1403      c = '\\';      c = '\\';
1404    
1405      /* Handle a run of data characters until a metacharacter is encountered.      /* Handle a run of data characters until a metacharacter is encountered.
# Line 1306  for (;; ptr++) Line 1432  for (;; ptr++)
1432    
1433        if (c == '\\')        if (c == '\\')
1434          {          {
1435          oldptr = ptr;          tempptr = ptr;
1436          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);          c = check_escape(&ptr, errorptr, *brackets, options, FALSE);
1437          if (c < 0) { ptr = oldptr; break; }          if (c < 0) { ptr = tempptr; break; }
1438          }          }
1439    
1440        /* Ordinary character or single-char escape */        /* Ordinary character or single-char escape */
# Line 1349  return FALSE; Line 1475  return FALSE;
1475  /* On entry, ptr is pointing past the bracket character, but on return  /* On entry, ptr is pointing past the bracket character, but on return
1476  it points to the closing bracket, or vertical bar, or end of string.  it points to the closing bracket, or vertical bar, or end of string.
1477  The code variable is pointing at the byte into which the BRA operator has been  The code variable is pointing at the byte into which the BRA operator has been
1478  stored.  stored. If the ims options are changed at the start (for a (?ims: group) or
1479    during any branch, we need to insert an OP_OPT item at the start of every
1480    following branch to ensure they get set correctly at run time, and also pass
1481    the new options into every subsequent branch compile.
1482    
1483  Argument:  Argument:
1484    options   the option bits    options     the option bits
1485    brackets  -> int containing the number of extracting brackets used    optchanged  new ims options to set as if (?ims) were at the start, or -1
1486    codeptr   -> the address of the current code pointer                 for no change
1487    ptrptr    -> the address of the current pattern pointer    brackets    -> int containing the number of extracting brackets used
1488    errorptr  -> pointer to error message    codeptr     -> the address of the current code pointer
1489      ptrptr      -> the address of the current pattern pointer
1490      errorptr    -> pointer to error message
1491      lookbehind  TRUE if this is a lookbehind assertion
1492      condref     > 0 for OPT_CREF setting at start of conditional group
1493    
1494  Returns:    TRUE on success  Returns:      TRUE on success
1495  */  */
1496    
1497  static BOOL  static BOOL
1498  compile_regex(int options, int *brackets, uschar **codeptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1499    const uschar **ptrptr, const char **errorptr)    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref)
1500  {  {
1501  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1502  uschar *code = *codeptr;  uschar *code = *codeptr;
1503    uschar *last_branch = code;
1504  uschar *start_bracket = code;  uschar *start_bracket = code;
1505    uschar *reverse_count = NULL;
1506    int oldoptions = options & PCRE_IMS;
1507    
1508    code += 3;
1509    
1510    /* At the start of a reference-based conditional group, insert the reference
1511    number as an OP_CREF item. */
1512    
1513    if (condref > 0)
1514      {
1515      *code++ = OP_CREF;
1516      *code++ = condref;
1517      }
1518    
1519    /* Loop for each alternative branch */
1520    
1521  for (;;)  for (;;)
1522    {    {
1523    int length;    int length;
   uschar *last_branch = code;  
1524    
1525    code += 3;    /* Handle change of options */
1526    if (!compile_branch(options, brackets, &code, &ptr, errorptr))  
1527      if (optchanged >= 0)
1528        {
1529        *code++ = OP_OPT;
1530        *code++ = optchanged;
1531        options = (options & ~PCRE_IMS) | optchanged;
1532        }
1533    
1534      /* Set up dummy OP_REVERSE if lookbehind assertion */
1535    
1536      if (lookbehind)
1537        {
1538        *code++ = OP_REVERSE;
1539        reverse_count = code;
1540        *code++ = 0;
1541        *code++ = 0;
1542        }
1543    
1544      /* Now compile the branch */
1545    
1546      if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged))
1547      {      {
1548      *ptrptr = ptr;      *ptrptr = ptr;
1549      return FALSE;      return FALSE;
# Line 1387  for (;;) Line 1555  for (;;)
1555    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1556    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1557    
1558      /* If lookbehind, check that this branch matches a fixed-length string,
1559      and put the length into the OP_REVERSE item. Temporarily mark the end of
1560      the branch with OP_END. */
1561    
1562      if (lookbehind)
1563        {
1564        *code = OP_END;
1565        length = find_fixedlength(last_branch);
1566        DPRINTF(("fixed length = %d\n", length));
1567        if (length < 0)
1568          {
1569          *errorptr = ERR25;
1570          *ptrptr = ptr;
1571          return FALSE;
1572          }
1573        reverse_count[0] = (length >> 8);
1574        reverse_count[1] = length & 255;
1575        }
1576    
1577    /* Reached end of expression, either ')' or end of pattern. Insert a    /* Reached end of expression, either ')' or end of pattern. Insert a
1578    terminating ket and the length of the whole bracketed item, and return,    terminating ket and the length of the whole bracketed item, and return,
1579    leaving the pointer at the terminating char. */    leaving the pointer at the terminating char. If any of the ims options
1580      were changed inside the group, compile a resetting op-code following. */
1581    
1582    if (*ptr != '|')    if (*ptr != '|')
1583      {      {
# Line 1397  for (;;) Line 1585  for (;;)
1585      *code++ = OP_KET;      *code++ = OP_KET;
1586      *code++ = length >> 8;      *code++ = length >> 8;
1587      *code++ = length & 255;      *code++ = length & 255;
1588        if (optchanged >= 0)
1589          {
1590          *code++ = OP_OPT;
1591          *code++ = oldoptions;
1592          }
1593      *codeptr = code;      *codeptr = code;
1594      *ptrptr = ptr;      *ptrptr = ptr;
1595      return TRUE;      return TRUE;
# Line 1405  for (;;) Line 1598  for (;;)
1598    /* Another branch follows; insert an "or" node and advance the pointer. */    /* Another branch follows; insert an "or" node and advance the pointer. */
1599    
1600    *code = OP_ALT;    *code = OP_ALT;
1601      last_branch = code;
1602      code += 3;
1603    ptr++;    ptr++;
1604    }    }
1605  /* Control never reaches here */  /* Control never reaches here */
# Line 1412  for (;;) Line 1607  for (;;)
1607    
1608    
1609    
1610    
1611    /*************************************************
1612    *      Find first significant op code            *
1613    *************************************************/
1614    
1615    /* This is called by several functions that scan a compiled expression looking
1616    for a fixed first character, or an anchoring op code etc. It skips over things
1617    that do not influence this. For one application, a change of caseless option is
1618    important.
1619    
1620    Arguments:
1621      code       pointer to the start of the group
1622      options    pointer to external options
1623      optbit     the option bit whose changing is significant, or
1624                 zero if none are
1625      optstop    TRUE to return on option change, otherwise change the options
1626                   value and continue
1627    
1628    Returns:     pointer to the first significant opcode
1629    */
1630    
1631    static const uschar*
1632    first_significant_code(const uschar *code, int *options, int optbit,
1633      BOOL optstop)
1634    {
1635    for (;;)
1636      {
1637      switch ((int)*code)
1638        {
1639        case OP_OPT:
1640        if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit))
1641          {
1642          if (optstop) return code;
1643          *options = (int)code[1];
1644          }
1645        code += 2;
1646        break;
1647    
1648        case OP_CREF:
1649        code += 2;
1650        break;
1651    
1652        case OP_ASSERT_NOT:
1653        case OP_ASSERTBACK:
1654        case OP_ASSERTBACK_NOT:
1655        do code += (code[1] << 8) + code[2]; while (*code == OP_ALT);
1656        code += 3;
1657        break;
1658    
1659        default:
1660        return code;
1661        }
1662      }
1663    /* Control never reaches here */
1664    }
1665    
1666    
1667    
1668    
1669  /*************************************************  /*************************************************
1670  *          Check for anchored expression         *  *          Check for anchored expression         *
1671  *************************************************/  *************************************************/
# Line 1426  A branch is also implicitly anchored if Line 1680  A branch is also implicitly anchored if
1680  the rest of the pattern at all possible matching points, so there is no point  the rest of the pattern at all possible matching points, so there is no point
1681  trying them again.  trying them again.
1682    
1683  Argument:  points to start of expression (the bracket)  Arguments:
1684  Returns:   TRUE or FALSE    code       points to start of expression (the bracket)
1685      options    points to the options setting
1686    
1687    Returns:     TRUE or FALSE
1688  */  */
1689    
1690  static BOOL  static BOOL
1691  is_anchored(register const uschar *code, BOOL multiline)  is_anchored(register const uschar *code, int *options)
1692  {  {
1693  do {  do {
1694     int op = (int)code[3];     const uschar *scode = first_significant_code(code + 3, options,
1695     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE)       PCRE_MULTILINE, FALSE);
1696       { if (!is_anchored(code+3, multiline)) return FALSE; }     register int op = *scode;
1697       if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1698         { if (!is_anchored(scode, options)) return FALSE; }
1699     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1700       { if (code[4] != OP_ANY) return FALSE; }       { if (scode[1] != OP_ANY) return FALSE; }
1701     else if (op != OP_SOD && (multiline || op != OP_CIRC)) return FALSE;     else if (op != OP_SOD &&
1702               ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
1703         return FALSE;
1704     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1705     }     }
1706  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1463  static BOOL Line 1724  static BOOL
1724  is_startline(const uschar *code)  is_startline(const uschar *code)
1725  {  {
1726  do {  do {
1727     if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     const uschar *scode = first_significant_code(code + 3, NULL, 0, FALSE);
1728       { if (!is_startline(code+3)) return FALSE; }     register int op = *scode;
1729     else if (code[3] != OP_CIRC) return FALSE;     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1730         { if (!is_startline(scode)) return FALSE; }
1731       else if (op != OP_CIRC) return FALSE;
1732     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1733     }     }
1734  while (*code == OP_ALT);  while (*code == OP_ALT);
# Line 1484  Consider each alternative branch. If the Line 1747  Consider each alternative branch. If the
1747  a bracket all of whose alternatives start with the same char (recurse ad lib),  a bracket all of whose alternatives start with the same char (recurse ad lib),
1748  then we return that char, otherwise -1.  then we return that char, otherwise -1.
1749    
1750  Argument:  points to start of expression (the bracket)  Arguments:
1751  Returns:   -1 or the fixed first char    code       points to start of expression (the bracket)
1752      options    pointer to the options (used to check casing changes)
1753    
1754    Returns:     -1 or the fixed first char
1755  */  */
1756    
1757  static int  static int
1758  find_firstchar(uschar *code)  find_firstchar(const uschar *code, int *options)
1759  {  {
1760  register int c = -1;  register int c = -1;
1761  do  do {
1762    {     int d;
1763    register int charoffset = 4;     const uschar *scode = first_significant_code(code + 3, options,
1764         PCRE_CASELESS, TRUE);
1765    if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT)     register int op = *scode;
1766      {  
1767      register int d;     if (op >= OP_BRA) op = OP_BRA;
1768      if ((d = find_firstchar(code+3)) < 0) return -1;  
1769      if (c < 0) c = d; else if (c != d) return -1;     switch(op)
1770      }       {
1771         default:
1772    else switch(code[3])       return -1;
1773      {  
1774      default:       case OP_BRA:
1775      return -1;       case OP_ASSERT:
1776         case OP_ONCE:
1777      case OP_EXACT:       /* Fall through */       case OP_COND:
1778      charoffset++;       if ((d = find_firstchar(scode, options)) < 0) return -1;
1779         if (c < 0) c = d; else if (c != d) return -1;
1780      case OP_CHARS:       /* Fall through */       break;
1781      charoffset++;  
1782         case OP_EXACT:       /* Fall through */
1783         scode++;
1784    
1785         case OP_CHARS:       /* Fall through */
1786         scode++;
1787    
1788         case OP_PLUS:
1789         case OP_MINPLUS:
1790         if (c < 0) c = scode[1]; else if (c != scode[1]) return -1;
1791         break;
1792         }
1793    
1794      case OP_PLUS:     code += (code[1] << 8) + code[2];
1795      case OP_MINPLUS:     }
     if (c < 0) c = code[charoffset]; else if (c != code[charoffset]) return -1;  
     break;  
     }  
   code += (code[1] << 8) + code[2];  
   }  
1796  while (*code == OP_ALT);  while (*code == OP_ALT);
1797  return c;  return c;
1798  }  }
1799    
1800    
1801    
1802    
1803    
1804  /*************************************************  /*************************************************
1805  *        Compile a Regular Expression            *  *        Compile a Regular Expression            *
1806  *************************************************/  *************************************************/
# Line 1549  pcre_compile(const char *pattern, int op Line 1823  pcre_compile(const char *pattern, int op
1823    int *erroroffset)    int *erroroffset)
1824  {  {
1825  real_pcre *re;  real_pcre *re;
 int spaces = 0;  
1826  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
1827  int runlength;  int runlength;
1828  int c, size;  int c, size;
1829  int bracount = 0;  int bracount = 0;
 int brastack[200];  
1830  int top_backref = 0;  int top_backref = 0;
1831    int branch_extra = 0;
1832    int branch_newextra;
1833  unsigned int brastackptr = 0;  unsigned int brastackptr = 0;
1834  uschar *code;  uschar *code;
1835  const uschar *ptr;  const uschar *ptr;
1836    int brastack[BRASTACK_SIZE];
1837    uschar bralenstack[BRASTACK_SIZE];
1838    
1839  #ifdef DEBUG  #ifdef DEBUG
1840  uschar *code_base, *code_end;  uschar *code_base, *code_end;
# Line 1601  while ((c = *(++ptr)) != 0) Line 1877  while ((c = *(++ptr)) != 0)
1877    int min, max;    int min, max;
1878    int class_charcount;    int class_charcount;
1879    
1880    if ((pcre_ctypes[c] & ctype_space) != 0)    if ((options & PCRE_EXTENDED) != 0)
     {  
     if ((options & PCRE_EXTENDED) != 0) continue;  
     spaces++;  
     }  
   
   if (c == '#' && (options & PCRE_EXTENDED) != 0)  
1881      {      {
1882      while ((c = *(++ptr)) != 0 && c != '\n');      if ((pcre_ctypes[c] & ctype_space) != 0) continue;
1883      continue;      if (c == '#')
1884          {
1885          while ((c = *(++ptr)) != 0 && c != '\n');
1886          continue;
1887          }
1888      }      }
1889    
1890    switch(c)    switch(c)
# Line 1684  while ((c = *(++ptr)) != 0) Line 1958  while ((c = *(++ptr)) != 0)
1958      if (ptr[1] == '?') ptr++;      if (ptr[1] == '?') ptr++;
1959      continue;      continue;
1960    
1961      /* An alternation contains an offset to the next branch or ket. */      /* An alternation contains an offset to the next branch or ket. If any ims
1962        options changed in the previous branch(es), and/or if we are in a
1963        lookbehind assertion, extra space will be needed at the start of the
1964        branch. This is handled by branch_extra. */
1965    
1966      case '|':      case '|':
1967      length += 3;      length += 3 + branch_extra;
1968      continue;      continue;
1969    
1970      /* A character class uses 33 characters. Don't worry about character types      /* A character class uses 33 characters. Don't worry about character types
# Line 1733  while ((c = *(++ptr)) != 0) Line 2011  while ((c = *(++ptr)) != 0)
2011    
2012      /* Brackets may be genuine groups or special things */      /* Brackets may be genuine groups or special things */
2013    
2014      case '(':      case '(':
2015        branch_newextra = 0;
     /* Handle special forms of bracket, which all start (? */  
   
     if (ptr[1] == '?') switch (c = ptr[2])  
       {  
       /* Skip over comments entirely */  
       case '#':  
       ptr += 3;  
       while (*ptr != 0 && *ptr != ')') ptr++;  
       if (*ptr == 0)  
         {  
         *errorptr = ERR18;  
         goto PCRE_ERROR_RETURN;  
         }  
       continue;  
2016    
2017        /* Non-referencing groups and lookaheads just move the pointer on, and      /* Handle special forms of bracket, which all start (? */
       then behave like a non-special bracket, except that they don't increment  
       the count of extracting brackets. */  
   
       case ':':  
       case '=':  
       case '!':  
       ptr += 2;  
       break;  
2018    
2019        /* Ditto for the "once only" bracket, allowed only if the extra bit      if (ptr[1] == '?')
2020        is set. */        {
2021          int set, unset;
2022          int *optset;
2023    
2024        case '>':        switch (c = ptr[2])
       if ((options & PCRE_EXTRA) != 0)  
2025          {          {
2026            /* Skip over comments entirely */
2027            case '#':
2028            ptr += 3;
2029            while (*ptr != 0 && *ptr != ')') ptr++;
2030            if (*ptr == 0)
2031              {
2032              *errorptr = ERR18;
2033              goto PCRE_ERROR_RETURN;
2034              }
2035            continue;
2036    
2037            /* Non-referencing groups and lookaheads just move the pointer on, and
2038            then behave like a non-special bracket, except that they don't increment
2039            the count of extracting brackets. Ditto for the "once only" bracket,
2040            which is in Perl from version 5.005. */
2041    
2042            case ':':
2043            case '=':
2044            case '!':
2045            case '>':
2046          ptr += 2;          ptr += 2;
2047          break;          break;
         }  
       /* Else fall through */  
2048    
2049        /* Else loop setting valid options until ) is met. Anything else is an          /* Lookbehinds are in Perl from version 5.005 */
       error. */  
2050    
2051        default:          case '<':
2052        ptr += 2;          if (ptr[3] == '=' || ptr[3] == '!')
       for (;; ptr++)  
         {  
         if ((c = *ptr) == 'i')  
           {  
           options |= PCRE_CASELESS;  
           continue;  
           }  
         else if ((c = *ptr) == 'm')  
2053            {            {
2054            options |= PCRE_MULTILINE;            ptr += 3;
2055            continue;            branch_newextra = 3;
2056              length += 3;         /* For the first branch */
2057              break;
2058            }            }
2059          else if (c == 's')          *errorptr = ERR24;
2060            {          goto PCRE_ERROR_RETURN;
2061            options |= PCRE_DOTALL;  
2062            continue;          /* Conditionals are in Perl from version 5.005. The bracket must either
2063            be followed by a number (for bracket reference) or by an assertion
2064            group. */
2065    
2066            case '(':
2067            if ((pcre_ctypes[ptr[3]] & ctype_digit) != 0)
2068              {
2069              ptr += 4;
2070              length += 2;
2071              while ((pcre_ctypes[*ptr] & ctype_digit) != 0) ptr++;
2072              if (*ptr != ')')
2073                {
2074                *errorptr = ERR26;
2075                goto PCRE_ERROR_RETURN;
2076                }
2077            }            }
2078          else if (c == 'x')          else   /* An assertion must follow */
2079            {            {
2080            options |= PCRE_EXTENDED;            ptr++;   /* Can treat like ':' as far as spacing is concerned */
2081            length -= spaces;          /* Already counted spaces */  
2082            continue;            if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL)
2083                {
2084                ptr += 2;    /* To get right offset in message */
2085                *errorptr = ERR28;
2086                goto PCRE_ERROR_RETURN;
2087                }
2088            }            }
2089          else if (c == 'X')          break;
2090    
2091            /* Else loop checking valid options until ) is met. Anything else is an
2092            error. If we are without any brackets, i.e. at top level, the settings
2093            act as if specified in the options, so massage the options immediately.
2094            This is for backward compatibility with Perl 5.004. */
2095    
2096            default:
2097            set = unset = 0;
2098            optset = &set;
2099            ptr += 2;
2100    
2101            for (;; ptr++)
2102            {            {
2103            options |= PCRE_EXTRA;            c = *ptr;
2104            continue;            switch (c)
2105                {
2106                case 'i':
2107                *optset |= PCRE_CASELESS;
2108                continue;
2109    
2110                case 'm':
2111                *optset |= PCRE_MULTILINE;
2112                continue;
2113    
2114                case 's':
2115                *optset |= PCRE_DOTALL;
2116                continue;
2117    
2118                case 'x':
2119                *optset |= PCRE_EXTENDED;
2120                continue;
2121    
2122                case 'X':
2123                *optset |= PCRE_EXTRA;
2124                continue;
2125    
2126                case 'U':
2127                *optset |= PCRE_UNGREEDY;
2128                continue;
2129    
2130                case '-':
2131                optset = &unset;
2132                continue;
2133    
2134                /* A termination by ')' indicates an options-setting-only item;
2135                this is global at top level; otherwise nothing is done here and
2136                it is handled during the compiling process on a per-bracket-group
2137                basis. */
2138    
2139                case ')':
2140                if (brastackptr == 0)
2141                  {
2142                  options = (options | set) & (~unset);
2143                  set = unset = 0;     /* To save length */
2144                  }
2145                /* Fall through */
2146    
2147                /* A termination by ':' indicates the start of a nested group with
2148                the given options set. This is again handled at compile time, but
2149                we must allow for compiled space if any of the ims options are
2150                set. We also have to allow for resetting space at the end of
2151                the group, which is why 4 is added to the length and not just 2.
2152                If there are several changes of options within the same group, this
2153                will lead to an over-estimate on the length, but this shouldn't
2154                matter very much. We also have to allow for resetting options at
2155                the start of any alternations, which we do by setting
2156                branch_newextra to 2. */
2157    
2158                case ':':
2159                if (((set|unset) & PCRE_IMS) != 0)
2160                  {
2161                  length += 4;
2162                  branch_newextra = 2;
2163                  }
2164                goto END_OPTIONS;
2165    
2166                /* Unrecognized option character */
2167    
2168                default:
2169                *errorptr = ERR12;
2170                goto PCRE_ERROR_RETURN;
2171                }
2172            }            }
2173          else if (c == 'U')  
2174            /* If we hit a closing bracket, that's it - this is a freestanding
2175            option-setting. We need to ensure that branch_extra is updated if
2176            necessary. The only values branch_newextra can have here are 0 or 2.
2177            If the value is 2, then branch_extra must either be 2 or 5, depending
2178            on whether this is a lookbehind group or not. */
2179    
2180            END_OPTIONS:
2181            if (c == ')')
2182            {            {
2183            options |= PCRE_UNGREEDY;            if (branch_newextra == 2 && (branch_extra == 0 || branch_extra == 3))
2184                branch_extra += branch_newextra;
2185            continue;            continue;
2186            }            }
         else if (c == ')') break;  
2187    
2188          *errorptr = ERR12;          /* If options were terminated by ':' control comes here. Fall through
2189          goto PCRE_ERROR_RETURN;          to handle the group below. */
2190          }          }
       continue;                      /* End of this bracket handling */  
2191        }        }
2192    
2193      /* Extracting brackets must be counted so we can process escapes in a      /* Extracting brackets must be counted so we can process escapes in a
# Line 1823  while ((c = *(++ptr)) != 0) Line 2196  while ((c = *(++ptr)) != 0)
2196      else bracount++;      else bracount++;
2197    
2198      /* Non-special forms of bracket. Save length for computing whole length      /* Non-special forms of bracket. Save length for computing whole length
2199      at end if there's a repeat that requires duplication of the group. */      at end if there's a repeat that requires duplication of the group. Also
2200        save the current value of branch_extra, and start the new group with
2201        the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3
2202        for a lookbehind assertion. */
2203    
2204      if (brastackptr >= sizeof(brastack)/sizeof(int))      if (brastackptr >= sizeof(brastack)/sizeof(int))
2205        {        {
# Line 1831  while ((c = *(++ptr)) != 0) Line 2207  while ((c = *(++ptr)) != 0)
2207        goto PCRE_ERROR_RETURN;        goto PCRE_ERROR_RETURN;
2208        }        }
2209    
2210        bralenstack[brastackptr] = branch_extra;
2211        branch_extra = branch_newextra;
2212    
2213      brastack[brastackptr++] = length;      brastack[brastackptr++] = length;
2214      length += 3;      length += 3;
2215      continue;      continue;
# Line 1838  while ((c = *(++ptr)) != 0) Line 2217  while ((c = *(++ptr)) != 0)
2217      /* Handle ket. Look for subsequent max/min; for certain sets of values we      /* Handle ket. Look for subsequent max/min; for certain sets of values we
2218      have to replicate this bracket up to that many times. If brastackptr is      have to replicate this bracket up to that many times. If brastackptr is
2219      0 this is an unmatched bracket which will generate an error, but take care      0 this is an unmatched bracket which will generate an error, but take care
2220      not to try to access brastack[-1]. */      not to try to access brastack[-1] when computing the length and restoring
2221        the branch_extra value. */
2222    
2223      case ')':      case ')':
2224      length += 3;      length += 3;
2225        {        {
2226        int minval = 1;        int minval = 1;
2227        int maxval = 1;        int maxval = 1;
2228        int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0;        int duplength;
2229    
2230          if (brastackptr > 0)
2231            {
2232            duplength = length - brastack[--brastackptr];
2233            branch_extra = bralenstack[brastackptr];
2234            }
2235          else duplength = 0;
2236    
2237        /* Leave ptr at the final char; for read_repeat_counts this happens        /* Leave ptr at the final char; for read_repeat_counts this happens
2238        automatically; for the others we need an increment. */        automatically; for the others we need an increment. */
# Line 1881  while ((c = *(++ptr)) != 0) Line 2268  while ((c = *(++ptr)) != 0)
2268      runlength = 0;      runlength = 0;
2269      do      do
2270        {        {
2271        if ((pcre_ctypes[c] & ctype_space) != 0)        if ((options & PCRE_EXTENDED) != 0)
         {  
         if ((options & PCRE_EXTENDED) != 0) continue;  
         spaces++;  
         }  
   
       if (c == '#' && (options & PCRE_EXTENDED) != 0)  
2272          {          {
2273          while ((c = *(++ptr)) != 0 && c != '\n');          if ((pcre_ctypes[c] & ctype_space) != 0) continue;
2274          continue;          if (c == '#')
2275              {
2276              while ((c = *(++ptr)) != 0 && c != '\n');
2277              continue;
2278              }
2279          }          }
2280    
2281        /* Backslash may introduce a data char or a metacharacter; stop the        /* Backslash may introduce a data char or a metacharacter; stop the
# Line 1955  ptr = (const uschar *)pattern; Line 2340  ptr = (const uschar *)pattern;
2340  code = re->code;  code = re->code;
2341  *code = OP_BRA;  *code = OP_BRA;
2342  bracount = 0;  bracount = 0;
2343  (void)compile_regex(options, &bracount, &code, &ptr, errorptr);  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1);
2344  re->top_bracket = bracount;  re->top_bracket = bracount;
2345  re->top_backref = top_backref;  re->top_backref = top_backref;
2346    
# Line 1972  if debugging, leave the test till after Line 2357  if debugging, leave the test till after
2357  if (code - re->code > length) *errorptr = ERR23;  if (code - re->code > length) *errorptr = ERR23;
2358  #endif  #endif
2359    
2360    /* Give an error if there's back reference to a non-existent capturing
2361    subpattern. */
2362    
2363    if (top_backref > re->top_bracket) *errorptr = ERR15;
2364    
2365  /* Failed to compile */  /* Failed to compile */
2366    
2367  if (*errorptr != NULL)  if (*errorptr != NULL)
# Line 1990  to set the PCRE_STARTLINE flag if all br Line 2380  to set the PCRE_STARTLINE flag if all br
2380    
2381  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2382    {    {
2383    if (is_anchored(re->code, (options & PCRE_MULTILINE) != 0))    int temp_options = options;
2384      if (is_anchored(re->code, &temp_options))
2385      re->options |= PCRE_ANCHORED;      re->options |= PCRE_ANCHORED;
2386    else    else
2387      {      {
2388      int ch = find_firstchar(re->code);      int ch = find_firstchar(re->code, &temp_options);
2389      if (ch >= 0)      if (ch >= 0)
2390        {        {
2391        re->first_char = ch;        re->first_char = ch;
# Line 2009  if ((options & PCRE_ANCHORED) == 0) Line 2400  if ((options & PCRE_ANCHORED) == 0)
2400    
2401  #ifdef DEBUG  #ifdef DEBUG
2402    
2403  printf("Length = %d top_bracket = %d top_backref=%d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
2404    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
2405    
2406  if (re->options != 0)  if (re->options != 0)
# Line 2048  while (code < code_end) Line 2439  while (code < code_end)
2439    
2440    else switch(*code)    else switch(*code)
2441      {      {
2442        case OP_OPT:
2443        printf(" %.2x %s", code[1], OP_names[*code]);
2444        code++;
2445        break;
2446    
2447        case OP_COND:
2448        printf("%3d Cond", (code[1] << 8) + code[2]);
2449        code += 2;
2450        break;
2451    
2452        case OP_CREF:
2453        printf(" %.2d %s", code[1], OP_names[*code]);
2454        code++;
2455        break;
2456    
2457      case OP_CHARS:      case OP_CHARS:
2458      charlength = *(++code);      charlength = *(++code);
2459      printf("%3d ", charlength);      printf("%3d ", charlength);
# Line 2061  while (code < code_end) Line 2467  while (code < code_end)
2467      case OP_KET:      case OP_KET:
2468      case OP_ASSERT:      case OP_ASSERT:
2469      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2470        case OP_ASSERTBACK:
2471        case OP_ASSERTBACK_NOT:
2472      case OP_ONCE:      case OP_ONCE:
2473      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);      printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2474      code += 2;      code += 2;
2475      break;      break;
2476    
2477        case OP_REVERSE:
2478        printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]);
2479        code += 2;
2480        break;
2481    
2482      case OP_STAR:      case OP_STAR:
2483      case OP_MINSTAR:      case OP_MINSTAR:
2484      case OP_PLUS:      case OP_PLUS:
# Line 2139  while (code < code_end) Line 2552  while (code < code_end)
2552      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
2553    
2554      case OP_CLASS:      case OP_CLASS:
     case OP_NEGCLASS:  
2555        {        {
2556        int i, min, max;        int i, min, max;
2557          code++;
2558        if (*code++ == OP_CLASS) printf("    [");        printf("    [");
         else printf("   ^[");  
2559    
2560        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2561          {          {
# Line 2269  return FALSE; Line 2680  return FALSE;
2680  *          Match a back-reference                *  *          Match a back-reference                *
2681  *************************************************/  *************************************************/
2682    
2683  /* If a back reference hasn't been set, the match fails.  /* If a back reference hasn't been set, the length that is passed is greater
2684    than the number of characters left in the string, so the match fails.
2685    
2686  Arguments:  Arguments:
2687    number      reference number    offset      index into the offset vector
2688    eptr        points into the subject    eptr        points into the subject
2689    length      length to be matched    length      length to be matched
2690    md          points to match data block    md          points to match data block
2691      ims         the ims flags
2692    
2693  Returns:      TRUE if matched  Returns:      TRUE if matched
2694  */  */
2695    
2696  static BOOL  static BOOL
2697  match_ref(int number, register const uschar *eptr, int length, match_data *md)  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
2698      int ims)
2699  {  {
2700  const uschar *p = md->start_subject + md->offset_vector[number];  const uschar *p = md->start_subject + md->offset_vector[offset];
2701    
2702  #ifdef DEBUG  #ifdef DEBUG
2703  if (eptr >= md->end_subject)  if (eptr >= md->end_subject)
# Line 2300  printf("\n"); Line 2714  printf("\n");
2714    
2715  /* Always fail if not enough characters left */  /* Always fail if not enough characters left */
2716    
2717  if (length > md->end_subject - p) return FALSE;  if (length > md->end_subject - eptr) return FALSE;
2718    
2719  /* Separate the caselesss case for speed */  /* Separate the caselesss case for speed */
2720    
2721  if (md->caseless)  if ((ims & PCRE_CASELESS) != 0)
2722    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }    { while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; }
2723  else  else
2724    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }    { while (length-- > 0) if (*p++ != *eptr++) return FALSE; }
# Line 2318  return TRUE; Line 2732  return TRUE;
2732  *         Match from current position            *  *         Match from current position            *
2733  *************************************************/  *************************************************/
2734    
2735  /* On entry ecode points to the first opcode, and eptr to the first character.  /* On entry ecode points to the first opcode, and eptr to the first character
2736    in the subject string, while eptrb holds the value of eptr at the start of the
2737    last bracketed group - used for breaking infinite loops matching zero-length
2738    strings.
2739    
2740  Arguments:  Arguments:
2741     eptr        pointer in subject     eptr        pointer in subject
2742     ecode       position in code     ecode       position in code
2743     offset_top  current top pointer     offset_top  current top pointer
2744     md          pointer to "static" info for the match     md          pointer to "static" info for the match
2745       ims         current /i, /m, and /s options
2746       condassert  TRUE if called to check a condition assertion
2747       eptrb       eptr at start of last bracket
2748    
2749  Returns:       TRUE if matched  Returns:       TRUE if matched
2750  */  */
2751    
2752  static BOOL  static BOOL
2753  match(register const uschar *eptr, register const uschar *ecode, int offset_top,  match(register const uschar *eptr, register const uschar *ecode,
2754    match_data *md)    int offset_top, match_data *md, int ims, BOOL condassert, const uschar *eptrb)
2755  {  {
2756    int original_ims = ims;   /* Save for resetting on ')' */
2757    
2758  for (;;)  for (;;)
2759    {    {
2760      int op = (int)*ecode;
2761    int min, max, ctype;    int min, max, ctype;
2762    register int i;    register int i;
2763    register int c;    register int c;
2764    BOOL minimize = FALSE;    BOOL minimize = FALSE;
2765    
2766    /* Opening bracket. Check the alternative branches in turn, failing if none    /* Opening capturing bracket. If there is space in the offset vector, save
2767    match. We have to set the start offset if required and there is space    the current subject position in the working slot at the top of the vector. We
2768    in the offset vector so that it is available for subsequent back references    mustn't change the current values of the data slot, because they may be set
2769    if the bracket matches. However, if the bracket fails, we must put back the    from a previous iteration of this group, and be referred to by a reference
2770    previous value of both offsets in case they were set by a previous copy of    inside the group.
   the same bracket. Don't worry about setting the flag for the error case here;  
   that is handled in the code for KET. */  
2771    
2772    if ((int)*ecode >= OP_BRA)    If the bracket fails to match, we need to restore this value and also the
2773      values of the final offsets, in case they were set by a previous iteration of
2774      the same bracket.
2775    
2776      If there isn't enough space in the offset vector, treat this as if it were a
2777      non-capturing bracket. Don't worry about setting the flag for the error case
2778      here; that is handled in the code for KET. */
2779    
2780      if (op > OP_BRA)
2781      {      {
2782      int number = (*ecode - OP_BRA) << 1;      int number = op - OP_BRA;
2783      int save_offset1 = 0, save_offset2 = 0;      int offset = number << 1;
2784    
2785      DPRINTF(("start bracket %d\n", number/2));      DPRINTF(("start bracket %d\n", number));
2786    
2787      if (number > 0 && number < md->offset_end)      if (offset < md->offset_max)
2788        {        {
2789        save_offset1 = md->offset_vector[number];        int save_offset1 = md->offset_vector[offset];
2790        save_offset2 = md->offset_vector[number+1];        int save_offset2 = md->offset_vector[offset+1];
2791        md->offset_vector[number] = eptr - md->start_subject;        int save_offset3 = md->offset_vector[md->offset_end - number];
2792    
2793          DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3));
2794          md->offset_vector[md->offset_end - number] = eptr - md->start_subject;
2795    
2796          do
2797            {
2798            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2799            ecode += (ecode[1] << 8) + ecode[2];
2800            }
2801          while (*ecode == OP_ALT);
2802    
2803        DPRINTF(("saving %d %d\n", save_offset1, save_offset2));        DPRINTF(("bracket %d failed\n", number));
2804    
2805          md->offset_vector[offset] = save_offset1;
2806          md->offset_vector[offset+1] = save_offset2;
2807          md->offset_vector[md->offset_end - number] = save_offset3;
2808          return FALSE;
2809        }        }
2810    
2811      /* Recurse for all the alternatives. */      /* Insufficient room for saving captured contents */
2812    
2813        else op = OP_BRA;
2814        }
2815    
2816      /* Other types of node can be handled by a switch */
2817    
2818      switch(op)
2819        {
2820        case OP_BRA:     /* Non-capturing bracket: optimized */
2821        DPRINTF(("start bracket 0\n"));
2822      do      do
2823        {        {
2824        if (match(eptr, ecode+3, offset_top, md)) return TRUE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2825        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2826        }        }
2827      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2828        DPRINTF(("bracket 0 failed\n"));
2829        return FALSE;
2830    
2831        /* Conditional group: compilation checked that there are no more than
2832        two branches. If the condition is false, skipping the first branch takes us
2833        past the end if there is only one branch, but that's OK because that is
2834        exactly what going to the ket would do. */
2835    
2836        case OP_COND:
2837        if (ecode[3] == OP_CREF)         /* Condition is extraction test */
2838          {
2839          int offset = ecode[4] << 1;    /* Doubled reference number */
2840          return match(eptr,
2841            ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)?
2842              5 : 3 + (ecode[1] << 8) + ecode[2]),
2843            offset_top, md, ims, FALSE, eptr);
2844          }
2845    
2846      DPRINTF(("bracket %d failed\n", number/2));      /* The condition is an assertion. Call match() to evaluate it - setting
2847        the final argument TRUE causes it to stop at the end of an assertion. */
2848    
2849      if (number > 0 && number < md->offset_end)      else
2850        {        {
2851        md->offset_vector[number] = save_offset1;        if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL))
2852        md->offset_vector[number+1] = save_offset2;          {
2853            ecode += 3 + (ecode[4] << 8) + ecode[5];
2854            while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2];
2855            }
2856          else ecode += (ecode[1] << 8) + ecode[2];
2857          return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr);
2858        }        }
2859        /* Control never reaches here */
2860    
2861      return FALSE;      /* Skip over conditional reference data if encountered (should not be) */
     }  
2862    
2863    /* Other types of node can be handled by a switch */      case OP_CREF:
2864        ecode += 2;
2865        break;
2866    
2867        /* End of the pattern */
2868    
   switch(*ecode)  
     {  
2869      case OP_END:      case OP_END:
2870      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
2871      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
2872      return TRUE;      return TRUE;
2873    
2874      /* The equivalent of Prolog's "cut" - if the rest doesn't match, the      /* Change option settings */
     whole thing doesn't match, so we have to get out via a longjmp(). */  
2875    
2876      case OP_CUT:      case OP_OPT:
2877      if (match(eptr, ecode+1, offset_top, md)) return TRUE;      ims = ecode[1];
2878      longjmp(md->fail_env, 1);      ecode += 2;
2879        DPRINTF(("ims set to %02x\n", ims));
2880        break;
2881    
2882      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
2883      matching won't pass the KET for an assertion. If any one branch matches,      matching won't pass the KET for an assertion. If any one branch matches,
2884      the assertion is true. */      the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
2885        start of each branch to move the current point backwards, so the code at
2886        this level is identical to the lookahead case. */
2887    
2888      case OP_ASSERT:      case OP_ASSERT:
2889        case OP_ASSERTBACK:
2890      do      do
2891        {        {
2892        if (match(eptr, ecode+3, offset_top, md)) break;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break;
2893        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2894        }        }
2895      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2896      if (*ecode == OP_KET) return FALSE;      if (*ecode == OP_KET) return FALSE;
2897    
2898        /* If checking an assertion for a condition, return TRUE. */
2899    
2900        if (condassert) return TRUE;
2901    
2902      /* Continue from after the assertion, updating the offsets high water      /* Continue from after the assertion, updating the offsets high water
2903      mark, since extracts may have been taken during the assertion. */      mark, since extracts may have been taken during the assertion. */
2904    
# Line 2424  for (;;) Line 2910  for (;;)
2910      /* Negative assertion: all branches must fail to match */      /* Negative assertion: all branches must fail to match */
2911    
2912      case OP_ASSERT_NOT:      case OP_ASSERT_NOT:
2913        case OP_ASSERTBACK_NOT:
2914      do      do
2915        {        {
2916        if (match(eptr, ecode+3, offset_top, md)) return FALSE;        if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE;
2917        ecode += (ecode[1] << 8) + ecode[2];        ecode += (ecode[1] << 8) + ecode[2];
2918        }        }
2919      while (*ecode == OP_ALT);      while (*ecode == OP_ALT);
2920    
2921        if (condassert) return TRUE;
2922      ecode += 3;      ecode += 3;
2923      continue;      continue;
2924    
2925        /* Move the subject pointer back. This occurs only at the start of
2926        each branch of a lookbehind assertion. If we are too close to the start to
2927        move back, this match function fails. */
2928    
2929        case OP_REVERSE:
2930        eptr -= (ecode[1] << 8) + ecode[2];
2931        if (eptr < md->start_subject) return FALSE;
2932        ecode += 3;
2933        break;
2934    
2935    
2936      /* "Once" brackets are like assertion brackets except that after a match,      /* "Once" brackets are like assertion brackets except that after a match,
2937      the point in the subject string is not moved back. Thus there can never be      the point in the subject string is not moved back. Thus there can never be
2938      a move back into the brackets. Check the alternative branches in turn - the      a move back into the brackets. Check the alternative branches in turn - the
2939      matching won't pass the KET for this kind of subpattern. If any one branch      matching won't pass the KET for this kind of subpattern. If any one branch
2940      matches, we carry on, leaving the subject pointer. */      matches, we carry on as at the end of a normal bracket, leaving the subject
2941        pointer. */
2942    
2943      case OP_ONCE:      case OP_ONCE:
     do  
2944        {        {
2945        if (match(eptr, ecode+3, offset_top, md)) break;        const uschar *prev = ecode;
2946        ecode += (ecode[1] << 8) + ecode[2];  
2947        }        do
2948      while (*ecode == OP_ALT);          {
2949      if (*ecode == OP_KET) return FALSE;          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break;
2950            ecode += (ecode[1] << 8) + ecode[2];
2951            }
2952          while (*ecode == OP_ALT);
2953    
2954      /* Continue as from after the assertion, updating the offsets high water        /* If hit the end of the group (which could be repeated), fail */
     mark, since extracts may have been taken. */  
2955    
2956      do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);        if (*ecode != OP_ONCE && *ecode != OP_ALT) return FALSE;
2957      ecode += 3;  
2958      offset_top = md->end_offset_top;        /* Continue as from after the assertion, updating the offsets high water
2959      eptr = md->end_match_ptr;        mark, since extracts may have been taken. */
2960      continue;  
2961          do ecode += (ecode[1] << 8) + ecode[2]; while (*ecode == OP_ALT);
2962    
2963          offset_top = md->end_offset_top;
2964          eptr = md->end_match_ptr;
2965    
2966          /* For a non-repeating ket, just continue at this level. This also
2967          happens for a repeating ket if no characters were matched in the group.
2968          This is the forcible breaking of infinite loops as implemented in Perl
2969          5.005. If there is an options reset, it will get obeyed in the normal
2970          course of events. */
2971    
2972          if (*ecode == OP_KET || eptr == eptrb)
2973            {
2974            ecode += 3;
2975            break;
2976            }
2977    
2978          /* The repeating kets try the rest of the pattern or restart from the
2979          preceding bracket, in the appropriate order. We need to reset any options
2980          that changed within the bracket before re-running it, so check the next
2981          opcode. */
2982    
2983          if (ecode[3] == OP_OPT)
2984            {
2985            ims = (ims & ~PCRE_IMS) | ecode[4];
2986            DPRINTF(("ims set to %02x at group repeat\n", ims));
2987            }
2988    
2989          if (*ecode == OP_KETRMIN)
2990            {
2991            if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
2992                match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
2993            }
2994          else  /* OP_KETRMAX */
2995            {
2996            if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
2997                match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
2998            }
2999          }
3000        return FALSE;
3001    
3002      /* An alternation is the end of a branch; scan along to find the end of the      /* An alternation is the end of a branch; scan along to find the end of the
3003      bracketed group and go to there. */      bracketed group and go to there. */
# Line 2473  for (;;) Line 3015  for (;;)
3015      case OP_BRAZERO:      case OP_BRAZERO:
3016        {        {
3017        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3018        if (match(eptr, next, offset_top, md)) return TRUE;        if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE;
3019        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3020        ecode = next + 3;        ecode = next + 3;
3021        }        }
# Line 2483  for (;;) Line 3025  for (;;)
3025        {        {
3026        const uschar *next = ecode+1;        const uschar *next = ecode+1;
3027        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);        do next += (next[1] << 8) + next[2]; while (*next == OP_ALT);
3028        if (match(eptr, next+3, offset_top, md)) return TRUE;        if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3029        ecode++;        ecode++;
3030        }        }
3031      break;;      break;
3032    
3033      /* End of a group, repeated or non-repeating. If we are at the end of      /* End of a group, repeated or non-repeating. If we are at the end of
3034      an assertion "group", stop matching and return TRUE, but record the      an assertion "group", stop matching and return TRUE, but record the
3035      current high water mark for use by positive assertions. */      current high water mark for use by positive assertions. Do this also
3036        for the "once" (not-backup up) groups. */
3037    
3038      case OP_KET:      case OP_KET:
3039      case OP_KETRMIN:      case OP_KETRMIN:
3040      case OP_KETRMAX:      case OP_KETRMAX:
3041        {        {
       int number;  
3042        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];        const uschar *prev = ecode - (ecode[1] << 8) - ecode[2];
3043    
3044        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE)        if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT ||
3045              *prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT ||
3046              *prev == OP_ONCE)
3047          {          {
3048          md->end_match_ptr = eptr;      /* For ONCE */          md->end_match_ptr = eptr;      /* For ONCE */
3049          md->end_offset_top = offset_top;          md->end_offset_top = offset_top;
3050          return TRUE;          return TRUE;
3051          }          }
3052    
3053        /* In all other cases we have to check the group number back at the        /* In all other cases except a conditional group we have to check the
3054        start and if necessary complete handling an extraction by setting the        group number back at the start and if necessary complete handling an
3055        final offset and bumping the high water mark. */        extraction by setting the offsets and bumping the high water mark. */
3056    
3057        number = (*prev - OP_BRA) << 1;        if (*prev != OP_COND)
3058            {
3059            int number = *prev - OP_BRA;
3060            int offset = number << 1;
3061    
3062        DPRINTF(("end bracket %d\n", number/2));          DPRINTF(("end bracket %d\n", number));
3063    
3064        if (number > 0)          if (number > 0)
         {  
         if (number >= md->offset_end) md->offset_overflow = TRUE; else  
3065            {            {
3066            md->offset_vector[number+1] = eptr - md->start_subject;            if (offset >= md->offset_max) md->offset_overflow = TRUE; else
3067            if (offset_top <= number) offset_top = number + 2;              {
3068                md->offset_vector[offset] =
3069                  md->offset_vector[md->offset_end - number];
3070                md->offset_vector[offset+1] = eptr - md->start_subject;
3071                if (offset_top <= offset) offset_top = offset + 2;
3072                }
3073            }            }
3074          }          }
3075    
3076        /* For a non-repeating ket, just advance to the next node and continue at        /* Reset the value of the ims flags, in case they got changed during
3077        this level. */        the group. */
3078    
3079          ims = original_ims;
3080          DPRINTF(("ims reset to %02x\n", ims));
3081    
3082        if (*ecode == OP_KET)        /* For a non-repeating ket, just continue at this level. This also
3083          happens for a repeating ket if no characters were matched in the group.
3084          This is the forcible breaking of infinite loops as implemented in Perl
3085          5.005. If there is an options reset, it will get obeyed in the normal
3086          course of events. */
3087    
3088          if (*ecode == OP_KET || eptr == eptrb)
3089          {          {
3090          ecode += 3;          ecode += 3;
3091          break;          break;
# Line 2537  for (;;) Line 3096  for (;;)
3096    
3097        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
3098          {          {
3099          if (match(eptr, ecode+3, offset_top, md) ||          if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) ||
3100              match(eptr, prev, offset_top, md)) return TRUE;              match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE;
3101          }          }
3102        else  /* OP_KETRMAX */        else  /* OP_KETRMAX */
3103          {          {
3104          if (match(eptr, prev, offset_top, md) ||          if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) ||
3105              match(eptr, ecode+3, offset_top, md)) return TRUE;              match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE;
3106          }          }
3107        }        }
3108      return FALSE;      return FALSE;
# Line 2552  for (;;) Line 3111  for (;;)
3111    
3112      case OP_CIRC:      case OP_CIRC:
3113      if (md->notbol && eptr == md->start_subject) return FALSE;      if (md->notbol && eptr == md->start_subject) return FALSE;
3114      if (md->multiline)      if ((ims & PCRE_MULTILINE) != 0)
3115        {        {
3116        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;        if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE;
3117        ecode++;        ecode++;
# Line 2567  for (;;) Line 3126  for (;;)
3126      ecode++;      ecode++;
3127      break;      break;
3128    
3129      /* Assert before internal newline if multiline, or before      /* Assert before internal newline if multiline, or before a terminating
3130      a terminating newline unless endonly is set, else end of subject unless      newline unless endonly is set, else end of subject unless noteol is set. */
     noteol is set. */  
3131    
3132      case OP_DOLL:      case OP_DOLL:
3133      if (md->noteol && eptr >= md->end_subject) return FALSE;      if ((ims & PCRE_MULTILINE) != 0)
     if (md->multiline)  
3134        {        {
3135        if (eptr < md->end_subject && *eptr != '\n') return FALSE;        if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; }
3136            else { if (md->noteol) return FALSE; }
3137        ecode++;        ecode++;
3138        break;        break;
3139        }        }
3140      else if (!md->endonly)      else
3141        {        {
3142        if (eptr < md->end_subject - 1 ||        if (md->noteol) return FALSE;
3143           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;        if (!md->endonly)
3144        ecode++;          {
3145        break;          if (eptr < md->end_subject - 1 ||
3146               (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3147    
3148            ecode++;
3149            break;
3150            }
3151        }        }
3152      /* ... else fall through */      /* ... else fall through */
3153    
3154      /* End of subject assertion */      /* End of subject assertion (\z) */
3155    
3156      case OP_EOD:      case OP_EOD:
3157      if (eptr < md->end_subject) return FALSE;      if (eptr < md->end_subject) return FALSE;
3158      ecode++;      ecode++;
3159      break;      break;
3160    
3161        /* End of subject or ending \n assertion (\Z) */
3162    
3163        case OP_EODN:
3164        if (eptr < md->end_subject - 1 ||
3165           (eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE;
3166        ecode++;
3167        break;
3168    
3169      /* Word boundary assertions */      /* Word boundary assertions */
3170    
3171      case OP_NOT_WORD_BOUNDARY:      case OP_NOT_WORD_BOUNDARY:
# Line 2613  for (;;) Line 3184  for (;;)
3184      /* Match a single character type; inline for speed */      /* Match a single character type; inline for speed */
3185    
3186      case OP_ANY:      case OP_ANY:
3187      if (!md->dotall && eptr < md->end_subject && *eptr == '\n') return FALSE;      if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n')
3188          return FALSE;
3189      if (eptr++ >= md->end_subject) return FALSE;      if (eptr++ >= md->end_subject) return FALSE;
3190      ecode++;      ecode++;
3191      break;      break;
# Line 2665  for (;;) Line 3237  for (;;)
3237      case OP_REF:      case OP_REF:
3238        {        {
3239        int length;        int length;
3240        int number = ecode[1] << 1;                /* Doubled reference number */        int offset = ecode[1] << 1;                /* Doubled reference number */
3241        ecode += 2;                                /* Advance past the item */        ecode += 2;                                /* Advance past the item */
3242    
3243        if (number >= offset_top || md->offset_vector[number] < 0)        /* If the reference is unset, set the length to be longer than the amount
3244          {        of subject left; this ensures that every attempt at a match fails. We
3245          md->errorcode = PCRE_ERROR_BADREF;        can't just fail here, because of the possibility of quantifiers with zero
3246          return FALSE;        minima. */
3247          }  
3248          length = (offset >= offset_top || md->offset_vector[offset] < 0)?
3249            md->end_subject - eptr + 1 :
3250            md->offset_vector[offset+1] - md->offset_vector[offset];
3251    
3252        length = md->offset_vector[number+1] - md->offset_vector[number];        /* Set up for repetition, or handle the non-repeated case */
3253    
3254        switch (*ecode)        switch (*ecode)
3255          {          {
# Line 2701  for (;;) Line 3276  for (;;)
3276          break;          break;
3277    
3278          default:               /* No repeat follows */          default:               /* No repeat follows */
3279          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3280          eptr += length;          eptr += length;
3281          continue;              /* With the main loop */          continue;              /* With the main loop */
3282          }          }
# Line 2717  for (;;) Line 3292  for (;;)
3292    
3293        for (i = 1; i <= min; i++)        for (i = 1; i <= min; i++)
3294          {          {
3295          if (!match_ref(number, eptr, length, md)) return FALSE;          if (!match_ref(offset, eptr, length, md, ims)) return FALSE;
3296          eptr += length;          eptr += length;
3297          }          }
3298    
# Line 2732  for (;;) Line 3307  for (;;)
3307          {          {
3308          for (i = min;; i++)          for (i = min;; i++)
3309            {            {
3310            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3311            if (i >= max || !match_ref(number, eptr, length, md))              return TRUE;
3312              if (i >= max || !match_ref(offset, eptr, length, md, ims))
3313              return FALSE;              return FALSE;
3314            eptr += length;            eptr += length;
3315            }            }
# Line 2747  for (;;) Line 3323  for (;;)
3323          const uschar *pp = eptr;          const uschar *pp = eptr;
3324          for (i = min; i < max; i++)          for (i = min; i < max; i++)
3325            {            {
3326            if (!match_ref(number, eptr, length, md)) break;            if (!match_ref(offset, eptr, length, md, ims)) break;
3327            eptr += length;            eptr += length;
3328            }            }
3329          while (eptr >= pp)          while (eptr >= pp)
3330            {            {
3331            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3332                return TRUE;
3333            eptr -= length;            eptr -= length;
3334            }            }
3335          return FALSE;          return FALSE;
# Line 2760  for (;;) Line 3337  for (;;)
3337        }        }
3338      /* Control never gets here */      /* Control never gets here */
3339    
3340    
3341    
3342      /* Match a character class, possibly repeatedly. Look past the end of the      /* Match a character class, possibly repeatedly. Look past the end of the
3343      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
3344      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. */
     matching was set at runtime but not at compile time, we have to check both  
     versions of a character, and we have to behave differently for positive and  
     negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are  
     treated differently. */  
3345    
3346      case OP_CLASS:      case OP_CLASS:
     case OP_NEGCLASS:  
3347        {        {
       BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;  
3348        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
3349        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
3350    
# Line 2810  for (;;) Line 3383  for (;;)
3383          {          {
3384          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
3385          c = *eptr++;          c = *eptr++;
3386            if ((data[c/8] & (1 << (c&7))) != 0) continue;
         /* Either not runtime caseless, or it was a positive class. For  
         runtime caseless, continue if either case is in the map. */  
   
         if (!nasty_case)  
           {  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           if (md->runtime_caseless)  
             {  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
           }  
   
         /* Runtime caseless and it was a negative class. Continue only if  
         both cases are in the map. */  
   
         else  
           {  
           if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  
           }  
   
3387          return FALSE;          return FALSE;
3388          }          }
3389    
# Line 2849  for (;;) Line 3399  for (;;)
3399          {          {
3400          for (i = min;; i++)          for (i = min;; i++)
3401            {            {
3402            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3403                return TRUE;
3404            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
3405            c = *eptr++;            c = *eptr++;
3406              if ((data[c/8] & (1 << (c&7))) != 0) continue;
           /* Either not runtime caseless, or it was a positive class. For  
           runtime caseless, continue if either case is in the map. */  
   
           if (!nasty_case)  
             {  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             if (md->runtime_caseless)  
               {  
               c = pcre_fcc[c];  
               if ((data[c/8] & (1 << (c&7))) != 0) continue;  
               }  
             }  
   
           /* Runtime caseless and it was a negative class. Continue only if  
           both cases are in the map. */  
   
           else  
             {  
             if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
   
3407            return FALSE;            return FALSE;
3408            }            }
3409          /* Control never gets here */          /* Control never gets here */
# Line 2890  for (;;) Line 3418  for (;;)
3418            {            {
3419            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
3420            c = *eptr;            c = *eptr;
3421              if ((data[c/8] & (1 << (c&7))) != 0) continue;
           /* Either not runtime caseless, or it was a positive class. For  
           runtime caseless, continue if either case is in the map. */  
   
           if (!nasty_case)  
             {  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             if (md->runtime_caseless)  
               {  
               c = pcre_fcc[c];  
               if ((data[c/8] & (1 << (c&7))) != 0) continue;  
               }  
             }  
   
           /* Runtime caseless and it was a negative class. Continue only if  
           both cases are in the map. */  
   
           else  
             {  
             if ((data[c/8] & (1 << (c&7))) == 0) break;  
             c = pcre_fcc[c];  
             if ((data[c/8] & (1 << (c&7))) != 0) continue;  
             }  
   
3422            break;            break;
3423            }            }
3424    
3425          while (eptr >= pp)          while (eptr >= pp)
3426            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3427                return TRUE;
3428          return FALSE;          return FALSE;
3429          }          }
3430        }        }
# Line 2945  for (;;) Line 3451  for (;;)
3451  #endif  #endif
3452    
3453        if (length > md->end_subject - eptr) return FALSE;        if (length > md->end_subject - eptr) return FALSE;
3454        if (md->caseless)        if ((ims & PCRE_CASELESS) != 0)
3455          {          {
3456          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;          while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE;
3457          }          }
# Line 3002  for (;;) Line 3508  for (;;)
3508      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,      DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max,
3509        max, eptr));        max, eptr));
3510    
3511      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3512        {        {
3513        c = pcre_lcc[c];        c = pcre_lcc[c];
3514        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE;
# Line 3011  for (;;) Line 3517  for (;;)
3517          {          {
3518          for (i = min;; i++)          for (i = min;; i++)
3519            {            {
3520            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3521                return TRUE;
3522            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])            if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++])
3523              return FALSE;              return FALSE;
3524            }            }
# Line 3026  for (;;) Line 3533  for (;;)
3533            eptr++;            eptr++;
3534            }            }
3535          while (eptr >= pp)          while (eptr >= pp)
3536            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3537                return TRUE;
3538          return FALSE;          return FALSE;
3539          }          }
3540        /* Control never gets here */        /* Control never gets here */
# Line 3042  for (;;) Line 3550  for (;;)
3550          {          {
3551          for (i = min;; i++)          for (i = min;; i++)
3552            {            {
3553            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3554                return TRUE;
3555            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE;
3556            }            }
3557          /* Control never gets here */          /* Control never gets here */
# Line 3056  for (;;) Line 3565  for (;;)
3565            eptr++;            eptr++;
3566            }            }
3567          while (eptr >= pp)          while (eptr >= pp)
3568           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3569               return TRUE;
3570          return FALSE;          return FALSE;
3571          }          }
3572        }        }
# Line 3067  for (;;) Line 3577  for (;;)
3577      case OP_NOT:      case OP_NOT:
3578      if (eptr >= md->end_subject) return FALSE;      if (eptr >= md->end_subject) return FALSE;
3579      ecode++;      ecode++;
3580      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3581        {        {
3582        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;        if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE;
3583        }        }
# Line 3127  for (;;) Line 3637  for (;;)
3637      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,      DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max,
3638        max, eptr));        max, eptr));
3639    
3640      if (md->caseless)      if ((ims & PCRE_CASELESS) != 0)
3641        {        {
3642        c = pcre_lcc[c];        c = pcre_lcc[c];
3643        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;        for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE;
# Line 3136  for (;;) Line 3646  for (;;)
3646          {          {
3647          for (i = min;; i++)          for (i = min;; i++)
3648            {            {
3649            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3650                return TRUE;
3651            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])            if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++])
3652              return FALSE;              return FALSE;
3653            }            }
# Line 3151  for (;;) Line 3662  for (;;)
3662            eptr++;            eptr++;
3663            }            }
3664          while (eptr >= pp)          while (eptr >= pp)
3665            if (match(eptr--, ecode, offset_top, md)) return TRUE;            if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3666                return TRUE;
3667          return FALSE;          return FALSE;
3668          }          }
3669        /* Control never gets here */        /* Control never gets here */
# Line 3167  for (;;) Line 3679  for (;;)
3679          {          {
3680          for (i = min;; i++)          for (i = min;; i++)
3681            {            {
3682            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb))
3683                return TRUE;
3684            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;            if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE;
3685            }            }
3686          /* Control never gets here */          /* Control never gets here */
# Line 3181  for (;;) Line 3694  for (;;)
3694            eptr++;            eptr++;
3695            }            }
3696          while (eptr >= pp)          while (eptr >= pp)
3697           if (match(eptr--, ecode, offset_top, md)) return TRUE;           if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3698               return TRUE;
3699          return FALSE;          return FALSE;
3700          }          }
3701        }        }
# Line 3231  for (;;) Line 3745  for (;;)
3745      if (min > 0) switch(ctype)      if (min > 0) switch(ctype)
3746        {        {
3747        case OP_ANY:        case OP_ANY:
3748        if (!md->dotall)        if ((ims & PCRE_DOTALL) == 0)
3749          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }          { for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; }
3750        else eptr += min;        else eptr += min;
3751        break;        break;
# Line 3278  for (;;) Line 3792  for (;;)
3792        {        {
3793        for (i = min;; i++)        for (i = min;; i++)
3794          {          {
3795          if (match(eptr, ecode, offset_top, md)) return TRUE;          if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE;
3796          if (i >= max || eptr >= md->end_subject ||          if (i >= max || eptr >= md->end_subject ||
3797            !match_type(ctype, *eptr++, md->dotall))            !match_type(ctype, *eptr++, (ims & PCRE_DOTALL) != 0))
3798              return FALSE;              return FALSE;
3799          }          }
3800        /* Control never gets here */        /* Control never gets here */
# Line 3295  for (;;) Line 3809  for (;;)
3809        switch(ctype)        switch(ctype)
3810          {          {
3811          case OP_ANY:          case OP_ANY:
3812          if (!md->dotall)          if ((ims & PCRE_DOTALL) == 0)
3813            {            {
3814            for (i = min; i < max; i++)            for (i = min; i < max; i++)
3815              {              {
# Line 3367  for (;;) Line 3881  for (;;)
3881          }          }
3882    
3883        while (eptr >= pp)        while (eptr >= pp)
3884          if (match(eptr--, ecode, offset_top, md)) return TRUE;          if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb))
3885              return TRUE;
3886        return FALSE;        return FALSE;
3887        }        }
3888      /* Control never gets here */      /* Control never gets here */
# Line 3390  for (;;) Line 3905  for (;;)
3905    
3906    
3907    
 /*************************************************  
 *         Segregate setjmp()                     *  
 *************************************************/  
   
 /* The -Wall option of gcc gives warnings for all local variables when setjmp()  
 is used, even if the coding conforms to the rules of ANSI C. To avoid this, we  
 hide it in a separate function. This is called only when PCRE_EXTRA is set,  
 since it's needed only for the extension \X option, and with any luck, a good  
 compiler will spot the tail recursion and compile it efficiently.  
   
 Arguments:  
    eptr        pointer in subject  
    ecode       position in code  
    offset_top  current top pointer  
    md          pointer to "static" info for the match  
   
 Returns:       TRUE if matched  
 */  
   
 static BOOL  
 match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,  
   match_data *match_block)  
 {  
 return setjmp(match_block->fail_env) == 0 &&  
       match(eptr, ecode, offset_top, match_block);  
 }  
   
   
3908    
3909  /*************************************************  /*************************************************
3910  *         Execute a Regular Expression           *  *         Execute a Regular Expression           *
# Line 3448  pcre_exec(const pcre *external_re, const Line 3935  pcre_exec(const pcre *external_re, const
3935  {  {
3936  int resetcount, ocount;  int resetcount, ocount;
3937  int first_char = -1;  int first_char = -1;
3938    int ims = 0;
3939  match_data match_block;  match_data match_block;
3940  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
3941  const uschar *start_match = (const uschar *)subject;  const uschar *start_match = (const uschar *)subject;
# Line 3468  match_block.start_subject = (const uscha Line 3956  match_block.start_subject = (const uscha
3956  match_block.end_subject = match_block.start_subject + length;  match_block.end_subject = match_block.start_subject + length;
3957  end_subject = match_block.end_subject;  end_subject = match_block.end_subject;
3958    
3959  match_block.caseless  = ((re->options | options) & PCRE_CASELESS) != 0;  match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
 match_block.runtime_caseless = match_block.caseless &&  
   (re->options & PCRE_CASELESS) == 0;  
   
 match_block.multiline = ((re->options | options) & PCRE_MULTILINE) != 0;  
 match_block.dotall    = ((re->options | options) & PCRE_DOTALL) != 0;  
 match_block.endonly   = ((re->options | options) & PCRE_DOLLAR_ENDONLY) != 0;  
3960    
3961  match_block.notbol = (options & PCRE_NOTBOL) != 0;  match_block.notbol = (options & PCRE_NOTBOL) != 0;
3962  match_block.noteol = (options & PCRE_NOTEOL) != 0;  match_block.noteol = (options & PCRE_NOTEOL) != 0;
3963    
3964  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
3965    
3966    /* The ims options can vary during the matching as a result of the presence
3967    of (?ims) items in the pattern. They are kept in a local variable so that
3968    restoring at the exit of a group is easy. */
3969    
3970    ims = re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL);
3971    
3972  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3973  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3974  Otherwise, we can use the vector supplied, rounding down its size to a multiple  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3975  of 2. */  of 3. */
3976    
3977  ocount = offsetcount & (-2);  ocount = offsetcount - (offsetcount % 3);
3978  if (re->top_backref > 0 && re->top_backref >= ocount/2)  
3979    if (re->top_backref > 0 && re->top_backref >= ocount/3)
3980    {    {
3981    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 3 + 3;
3982    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int));
3983    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3984    using_temporary_offsets = TRUE;    using_temporary_offsets = TRUE;
# Line 3498  if (re->top_backref > 0 && re->top_backr Line 3987  if (re->top_backref > 0 && re->top_backr
3987  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
3988    
3989  match_block.offset_end = ocount;  match_block.offset_end = ocount;
3990    match_block.offset_max = (2*ocount)/3;
3991  match_block.offset_overflow = FALSE;  match_block.offset_overflow = FALSE;
3992    
3993  /* Compute the minimum number of offsets that we need to reset each time. Doing  /* Compute the minimum number of offsets that we need to reset each time. Doing
# Line 3507  in the pattern. */ Line 3997  in the pattern. */
3997  resetcount = 2 + re->top_bracket * 2;  resetcount = 2 + re->top_bracket * 2;
3998  if (resetcount > offsetcount) resetcount = ocount;  if (resetcount > offsetcount) resetcount = ocount;
3999    
 /* If MULTILINE is set at exec time but was not set at compile time, and the  
 anchored flag is set, we must re-check because a setting provoked by ^ in the  
 pattern is not right in multi-line mode. Calling is_anchored() again here does  
 the right check, because multiline is now set. If it now yields FALSE, the  
 expression must have had ^ starting some of its branches. Check to see if  
 that is true for *all* branches, and if so, set the startline flag. */  
   
 if (match_block. multiline && anchored && (re->options & PCRE_MULTILINE) == 0 &&  
     !is_anchored(re->code, match_block.multiline))  
   {  
   anchored = FALSE;  
   if (is_startline(re->code)) startline = TRUE;  
   }  
   
4000  /* Set up the first character to match, if available. The first_char value is  /* Set up the first character to match, if available. The first_char value is
4001  never set for an anchored regular expression, but the anchoring may be forced  never set for an anchored regular expression, but the anchoring may be forced
4002  at run time, so we have to test for anchoring. The first char may be unset for  at run time, so we have to test for anchoring. The first char may be unset for
4003  an unanchored pattern, of course. If there's no first char and the pattern was  an unanchored pattern, of course. If there's no first char and the pattern was
4004  studied, the may be a bitmap of possible first characters. However, we can  studied, there may be a bitmap of possible first characters. */
 use this only if the caseless state of the studying was correct. */  
4005    
4006  if (!anchored)  if (!anchored)
4007    {    {
4008    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->options & PCRE_FIRSTSET) != 0)
4009      {      {
4010      first_char = re->first_char;      first_char = re->first_char;
4011      if (match_block.caseless) first_char = pcre_lcc[first_char];      if ((ims & PCRE_CASELESS) != 0) first_char = pcre_lcc[first_char];
4012      }      }
4013    else    else
4014      if (!startline && extra != NULL &&      if (!startline && extra != NULL &&
4015        (extra->options & PCRE_STUDY_MAPPED) != 0 &&        (extra->options & PCRE_STUDY_MAPPED) != 0)
       ((extra->options & PCRE_STUDY_CASELESS) != 0) == match_block.caseless)  
4016          start_bits = extra->start_bits;          start_bits = extra->start_bits;
4017    }    }
4018    
# Line 3558  do Line 4032  do
4032    
4033    if (first_char >= 0)    if (first_char >= 0)
4034      {      {
4035      if (match_block.caseless)      if ((ims & PCRE_CASELESS) != 0)
4036        while (start_match < end_subject && pcre_lcc[*start_match] != first_char)        while (start_match < end_subject && pcre_lcc[*start_match] != first_char)
4037          start_match++;          start_match++;
4038      else      else
# Line 3599  do Line 4073  do
4073    there were too many extractions, set the return code to zero. In the case    there were too many extractions, set the return code to zero. In the case
4074    where we had to get some local store to hold offsets for backreferences, copy    where we had to get some local store to hold offsets for backreferences, copy
4075    those back references that we can. In this case there need not be overflow    those back references that we can. In this case there need not be overflow
4076    if certain parts of the pattern were not used.    if certain parts of the pattern were not used. */
   
   Before starting the match, we have to set up a longjmp() target to enable  
   the "cut" operation to fail a match completely without backtracking. This  
   is done in a separate function to avoid compiler warnings. We need not do  
   it unless PCRE_EXTRA is set, since only in that case is the "cut" operation  
   enabled. */  
4077    
4078    if ((re->options & PCRE_EXTRA) != 0)    if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))
4079      {      continue;
     if (!match_with_setjmp(start_match, re->code, 2, &match_block))  
       continue;  
     }  
   else if (!match(start_match, re->code, 2, &match_block)) continue;  
4080    
4081    /* Copy the offset information from temporary store if necessary */    /* Copy the offset information from temporary store if necessary */
4082    

Legend:
Removed from v.21  
changed lines
  Added in v.23

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12