/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 35 by nigel, Sat Feb 24 21:39:05 2007 UTC revision 39 by nigel, Sat Feb 24 21:39:13 2007 UTC
# Line 111  static const short int escapes[] = { Line 111  static const short int escapes[] = {
111    
112  static BOOL  static BOOL
113    compile_regex(int, int, int *, uschar **, const uschar **, const char **,    compile_regex(int, int, int *, uschar **, const uschar **, const char **,
114      BOOL, int, compile_data *);      BOOL, int, int *, int *, compile_data *);
115    
116    
117    
# Line 148  tables. */ Line 148  tables. */
148  *          Return version string                 *  *          Return version string                 *
149  *************************************************/  *************************************************/
150    
151    #define STRING(a)  # a
152    #define XSTRING(s) STRING(s)
153    
154  const char *  const char *
155  pcre_version(void)  pcre_version(void)
156  {  {
157  return PCRE_VERSION;  return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE);
158  }  }
159    
160    
# Line 162  return PCRE_VERSION; Line 165  return PCRE_VERSION;
165  *************************************************/  *************************************************/
166    
167  /* This function picks potentially useful data out of the private  /* This function picks potentially useful data out of the private
168  structure.  structure. The public options are passed back in an int - though the
169    re->options field has been expanded to a long int, all the public options
170    at the low end of it, and so even on 16-bit systems this will still be OK.
171    Therefore, I haven't changed the API for pcre_info().
172    
173  Arguments:  Arguments:
174    external_re   points to compiled code    external_re   points to compiled code
# Line 181  pcre_info(const pcre *external_re, int * Line 187  pcre_info(const pcre *external_re, int *
187  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
188  if (re == NULL) return PCRE_ERROR_NULL;  if (re == NULL) return PCRE_ERROR_NULL;
189  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;  if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC;
190  if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS);  if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS);
191  if (first_char != NULL)  if (first_char != NULL)
192    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :    *first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char :
193       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;       ((re->options & PCRE_STARTLINE) != 0)? -1 : -2;
# Line 532  for (;;) Line 538  for (;;)
538    
539      case OP_REVERSE:      case OP_REVERSE:
540      cc++;      cc++;
541        /* Fall through */
542    
543      case OP_CREF:      case OP_CREF:
544      case OP_OPT:      case OP_OPT:
# Line 627  Arguments: Line 634  Arguments:
634    ptrptr       points to the current pattern pointer    ptrptr       points to the current pattern pointer
635    errorptr     points to pointer to error message    errorptr     points to pointer to error message
636    optchanged   set to the value of the last OP_OPT item compiled    optchanged   set to the value of the last OP_OPT item compiled
637      reqchar      set to the last literal character required, else -1
638      countlits    set to count of mandatory literal characters
639    cd           contains pointers to tables    cd           contains pointers to tables
640    
641  Returns:       TRUE on success  Returns:       TRUE on success
# Line 636  Returns: TRUE on success Line 645  Returns: TRUE on success
645  static BOOL  static BOOL
646  compile_branch(int options, int *brackets, uschar **codeptr,  compile_branch(int options, int *brackets, uschar **codeptr,
647    const uschar **ptrptr, const char **errorptr, int *optchanged,    const uschar **ptrptr, const char **errorptr, int *optchanged,
648    compile_data *cd)    int *reqchar, int *countlits, compile_data *cd)
649  {  {
650  int repeat_type, op_type;  int repeat_type, op_type;
651  int repeat_min, repeat_max;  int repeat_min, repeat_max;
652  int bravalue, length;  int bravalue, length;
653  int greedy_default, greedy_non_default;  int greedy_default, greedy_non_default;
654    int prevreqchar;
655    int condcount = 0;
656    int subcountlits = 0;
657  register int c;  register int c;
658  register uschar *code = *codeptr;  register uschar *code = *codeptr;
659  uschar *tempcode;  uschar *tempcode;
# Line 655  uschar class[32]; Line 667  uschar class[32];
667  greedy_default = ((options & PCRE_UNGREEDY) != 0);  greedy_default = ((options & PCRE_UNGREEDY) != 0);
668  greedy_non_default = greedy_default ^ 1;  greedy_non_default = greedy_default ^ 1;
669    
670    /* Initialize no required char, and count of literals */
671    
672    *reqchar = prevreqchar = -1;
673    *countlits = 0;
674    
675  /* Switch on next character until the end of the branch */  /* Switch on next character until the end of the branch */
676    
677  for (;; ptr++)  for (;; ptr++)
# Line 664  for (;; ptr++) Line 681  for (;; ptr++)
681    int class_lastchar;    int class_lastchar;
682    int newoptions;    int newoptions;
683    int condref;    int condref;
684      int subreqchar;
685    
686    c = *ptr;    c = *ptr;
687    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
# Line 937  for (;; ptr++) Line 955  for (;; ptr++)
955        { repeat_type = greedy_non_default; ptr++; }        { repeat_type = greedy_non_default; ptr++; }
956      else repeat_type = greedy_default;      else repeat_type = greedy_default;
957    
     /* If the maximum is zero then the minimum must also be zero; Perl allows  
     this case, so we do too - by simply omitting the item altogether. */  
   
     if (repeat_max == 0) code = previous;  
   
958      /* If previous was a string of characters, chop off the last one and use it      /* If previous was a string of characters, chop off the last one and use it
959      as the subject of the repeat. If there was only one character, we can      as the subject of the repeat. If there was only one character, we can
960      abolish the previous item altogether. */      abolish the previous item altogether. A repeat with a zero minimum wipes
961        out any reqchar setting, backing up to the previous value. We must also
962        adjust the countlits value. */
963    
964      else if (*previous == OP_CHARS)      if (*previous == OP_CHARS)
965        {        {
966        int len = previous[1];        int len = previous[1];
967    
968          if (repeat_min == 0) *reqchar = prevreqchar;
969          *countlits += repeat_min - 1;
970    
971        if (len == 1)        if (len == 1)
972          {          {
973          c = previous[2];          c = previous[2];
# Line 987  for (;; ptr++) Line 1006  for (;; ptr++)
1006        code = previous;        code = previous;
1007    
1008        OUTPUT_SINGLE_REPEAT:        OUTPUT_SINGLE_REPEAT:
1009        repeat_type += op_type;      /* Combine both values for many cases */  
1010          /* If the maximum is zero then the minimum must also be zero; Perl allows
1011          this case, so we do too - by simply omitting the item altogether. */
1012    
1013          if (repeat_max == 0) goto END_REPEAT;
1014    
1015          /* Combine the op_type with the repeat_type */
1016    
1017          repeat_type += op_type;
1018    
1019        /* A minimum of zero is handled either as the special case * or ?, or as        /* A minimum of zero is handled either as the special case * or ?, or as
1020        an UPTO, with the maximum given. */        an UPTO, with the maximum given. */
# Line 1064  for (;; ptr++) Line 1091  for (;; ptr++)
1091        }        }
1092    
1093      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1094      stuff after it. */      stuff after it, but just skip the item if the repeat was {0,0}. */
1095    
1096      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_REF)
1097        {        {
1098          if (repeat_max == 0)
1099            {
1100            code = previous;
1101            goto END_REPEAT;
1102            }
1103        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1104          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
1105        else if (repeat_min == 1 && repeat_max == -1)        else if (repeat_min == 1 && repeat_max == -1)
# Line 1118  for (;; ptr++) Line 1150  for (;; ptr++)
1150    
1151        if (repeat_min == 0)        if (repeat_min == 0)
1152          {          {
1153            /* If we set up a required char from the bracket, we must back off
1154            to the previous value and reset the countlits value too. */
1155    
1156            if (subcountlits > 0)
1157              {
1158              *reqchar = prevreqchar;
1159              *countlits -= subcountlits;
1160              }
1161    
1162          /* If the maximum is also zero, we just omit the group from the output          /* If the maximum is also zero, we just omit the group from the output
1163          altogether. */          altogether. */
1164    
1165          if (repeat_max == 0)          if (repeat_max == 0)
1166            {            {
1167            code = previous;            code = previous;
1168            previous = NULL;            goto END_REPEAT;
           break;  
1169            }            }
1170    
1171          /* If the maximum is 1 or unlimited, we just have to stick in the          /* If the maximum is 1 or unlimited, we just have to stick in the
# Line 1230  for (;; ptr++) Line 1270  for (;; ptr++)
1270        correct offset was computed above. */        correct offset was computed above. */
1271    
1272        else code[-ketoffset] = OP_KETRMAX + repeat_type;        else code[-ketoffset] = OP_KETRMAX + repeat_type;
   
   
 #ifdef NEVER  
       /* If the minimum is greater than zero, and the maximum is unlimited or  
       equal to the minimum, the first copy remains where it is, and is  
       replicated up to the minimum number of times. This case includes the +  
       repeat, but of course no replication is needed in that case. */  
   
       if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min))  
         {  
         for (i = 1; i < repeat_min; i++)  
           {  
           memcpy(code, previous, len);  
           code += len;  
           }  
         }  
   
       /* If the minimum is zero, stick BRAZERO in front of the first copy.  
       Then, if there is a fixed upper limit, replicated up to that many times,  
       sticking BRAZERO in front of all the optional ones. */  
   
       else  
         {  
         if (repeat_min == 0)  
           {  
           memmove(previous+1, previous, len);  
           code++;  
           *previous++ = OP_BRAZERO + repeat_type;  
           }  
   
         for (i = 1; i < repeat_min; i++)  
           {  
           memcpy(code, previous, len);  
           code += len;  
           }  
   
         for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++)  
           {  
           *code++ = OP_BRAZERO + repeat_type;  
           memcpy(code, previous, len);  
           code += len;  
           }  
         }  
   
       /* If the maximum is unlimited, set a repeater in the final copy. We  
       can't just offset backwards from the current code point, because we  
       don't know if there's been an options resetting after the ket. The  
       correct offset was computed above. */  
   
       if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;  
 #endif  
   
   
1273        }        }
1274    
1275      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
# Line 1295  for (;; ptr++) Line 1282  for (;; ptr++)
1282    
1283      /* In all case we no longer have a previous item. */      /* In all case we no longer have a previous item. */
1284    
1285        END_REPEAT:
1286      previous = NULL;      previous = NULL;
1287      break;      break;
1288    
# Line 1463  for (;; ptr++) Line 1451  for (;; ptr++)
1451           (bravalue == OP_ASSERTBACK ||           (bravalue == OP_ASSERTBACK ||
1452            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */            bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */
1453           condref,                      /* Condition reference number */           condref,                      /* Condition reference number */
1454             &subreqchar,                  /* For possible last char */
1455             &subcountlits,                /* For literal count */
1456           cd))                          /* Tables block */           cd))                          /* Tables block */
1457        goto FAILED;        goto FAILED;
1458    
# Line 1476  for (;; ptr++) Line 1466  for (;; ptr++)
1466    
1467      if (bravalue == OP_COND)      if (bravalue == OP_COND)
1468        {        {
       int branchcount = 0;  
1469        uschar *tc = code;        uschar *tc = code;
1470          condcount = 0;
1471    
1472        do {        do {
1473           branchcount++;           condcount++;
1474           tc += (tc[1] << 8) | tc[2];           tc += (tc[1] << 8) | tc[2];
1475           }           }
1476        while (*tc != OP_KET);        while (*tc != OP_KET);
1477    
1478        if (branchcount > 2)        if (condcount > 2)
1479          {          {
1480          *errorptr = ERR27;          *errorptr = ERR27;
1481          goto FAILED;          goto FAILED;
1482          }          }
1483        }        }
1484    
1485        /* Handle updating of the required character. If the subpattern didn't
1486        set one, leave it as it was. Otherwise, update it for normal brackets of
1487        all kinds, forward assertions, and conditions with two branches. Don't
1488        update the literal count for forward assertions, however. If the bracket
1489        is followed by a quantifier with zero repeat, we have to back off. Hence
1490        the definition of prevreqchar and subcountlits outside the main loop so
1491        that they can be accessed for the back off. */
1492    
1493        if (subreqchar > 0 &&
1494             (bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT ||
1495             (bravalue == OP_COND && condcount == 2)))
1496          {
1497          prevreqchar = *reqchar;
1498          *reqchar = subreqchar;
1499          if (bravalue != OP_ASSERT) *countlits += subcountlits;
1500          }
1501    
1502      /* Now update the main code pointer to the end of the group. */      /* Now update the main code pointer to the end of the group. */
1503    
1504      code = tempcode;      code = tempcode;
# Line 1586  for (;; ptr++) Line 1593  for (;; ptr++)
1593    
1594      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);      while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0);
1595    
1596        /* Update the last character and the count of literals */
1597    
1598        prevreqchar = (length > 1)? code[-2] : *reqchar;
1599        *reqchar = code[-1];
1600        *countlits += length;
1601    
1602      /* Compute the length and set it in the data vector, and advance to      /* Compute the length and set it in the data vector, and advance to
1603      the next state. */      the next state. */
1604    
# Line 1629  Argument: Line 1642  Argument:
1642    errorptr    -> pointer to error message    errorptr    -> pointer to error message
1643    lookbehind  TRUE if this is a lookbehind assertion    lookbehind  TRUE if this is a lookbehind assertion
1644    condref     > 0 for OPT_CREF setting at start of conditional group    condref     > 0 for OPT_CREF setting at start of conditional group
1645      reqchar     -> place to put the last required character, or a negative number
1646      countlits   -> place to put the shortest literal count of any branch
1647    cd          points to the data block with tables pointers    cd          points to the data block with tables pointers
1648    
1649  Returns:      TRUE on success  Returns:      TRUE on success
# Line 1637  Returns: TRUE on success Line 1652  Returns: TRUE on success
1652  static BOOL  static BOOL
1653  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,  compile_regex(int options, int optchanged, int *brackets, uschar **codeptr,
1654    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,    const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref,
1655    compile_data *cd)    int *reqchar, int *countlits, compile_data *cd)
1656  {  {
1657  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
1658  uschar *code = *codeptr;  uschar *code = *codeptr;
# Line 1645  uschar *last_branch = code; Line 1660  uschar *last_branch = code;
1660  uschar *start_bracket = code;  uschar *start_bracket = code;
1661  uschar *reverse_count = NULL;  uschar *reverse_count = NULL;
1662  int oldoptions = options & PCRE_IMS;  int oldoptions = options & PCRE_IMS;
1663    int branchreqchar, branchcountlits;
1664    
1665    *reqchar = -1;
1666    *countlits = INT_MAX;
1667  code += 3;  code += 3;
1668    
1669  /* At the start of a reference-based conditional group, insert the reference  /* At the start of a reference-based conditional group, insert the reference
# Line 1684  for (;;) Line 1702  for (;;)
1702    
1703    /* Now compile the branch */    /* Now compile the branch */
1704    
1705    if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd))    if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged,
1706          &branchreqchar, &branchcountlits, cd))
1707      {      {
1708      *ptrptr = ptr;      *ptrptr = ptr;
1709      return FALSE;      return FALSE;
# Line 1696  for (;;) Line 1715  for (;;)
1715    last_branch[1] = length >> 8;    last_branch[1] = length >> 8;
1716    last_branch[2] = length & 255;    last_branch[2] = length & 255;
1717    
1718      /* Save the last required character if all branches have the same; a current
1719      value of -1 means unset, while -2 means "previous branch had no last required
1720      char".  */
1721    
1722      if (*reqchar != -2)
1723        {
1724        if (branchreqchar >= 0)
1725          {
1726          if (*reqchar == -1) *reqchar = branchreqchar;
1727          else if (*reqchar != branchreqchar) *reqchar = -2;
1728          }
1729        else *reqchar = -2;
1730        }
1731    
1732      /* Keep the shortest literal count */
1733    
1734      if (branchcountlits < *countlits) *countlits = branchcountlits;
1735      DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits));
1736    
1737    /* If lookbehind, check that this branch matches a fixed-length string,    /* If lookbehind, check that this branch matches a fixed-length string,
1738    and put the length into the OP_REVERSE item. Temporarily mark the end of    and put the length into the OP_REVERSE item. Temporarily mark the end of
1739    the branch with OP_END. */    the branch with OP_END. */
# Line 1977  pcre_compile(const char *pattern, int op Line 2015  pcre_compile(const char *pattern, int op
2015  real_pcre *re;  real_pcre *re;
2016  int length = 3;      /* For initial BRA plus length */  int length = 3;      /* For initial BRA plus length */
2017  int runlength;  int runlength;
2018  int c, size;  int c, size, reqchar, countlits;
2019  int bracount = 0;  int bracount = 0;
2020  int top_backref = 0;  int top_backref = 0;
2021  int branch_extra = 0;  int branch_extra = 0;
# Line 2317  while ((c = *(++ptr)) != 0) Line 2355  while ((c = *(++ptr)) != 0)
2355              will lead to an over-estimate on the length, but this shouldn't              will lead to an over-estimate on the length, but this shouldn't
2356              matter very much. We also have to allow for resetting options at              matter very much. We also have to allow for resetting options at
2357              the start of any alternations, which we do by setting              the start of any alternations, which we do by setting
2358              branch_newextra to 2. */              branch_newextra to 2. Finally, we record whether the case-dependent
2359                flag ever changes within the regex. This is used by the "required
2360                character" code. */
2361    
2362              case ':':              case ':':
2363              if (((set|unset) & PCRE_IMS) != 0)              if (((set|unset) & PCRE_IMS) != 0)
2364                {                {
2365                length += 4;                length += 4;
2366                branch_newextra = 2;                branch_newextra = 2;
2367                  if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED;
2368                }                }
2369              goto END_OPTIONS;              goto END_OPTIONS;
2370    
# Line 2524  code = re->code; Line 2565  code = re->code;
2565  *code = OP_BRA;  *code = OP_BRA;
2566  bracount = 0;  bracount = 0;
2567  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,  (void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1,
2568    &compile_block);    &reqchar, &countlits, &compile_block);
2569  re->top_bracket = bracount;  re->top_bracket = bracount;
2570  re->top_backref = top_backref;  re->top_backref = top_backref;
2571    
# Line 2584  if ((options & PCRE_ANCHORED) == 0) Line 2625  if ((options & PCRE_ANCHORED) == 0)
2625      }      }
2626    }    }
2627    
2628    /* Save the last required character if there are at least two literal
2629    characters on all paths, or if there is no first character setting. */
2630    
2631    if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0))
2632      {
2633      re->req_char = reqchar;
2634      re->options |= PCRE_REQCHSET;
2635      }
2636    
2637  /* Print out the compiled data for debugging */  /* Print out the compiled data for debugging */
2638    
2639  #ifdef DEBUG  #ifdef DEBUG
# Line 2593  printf("Length = %d top_bracket = %d top Line 2643  printf("Length = %d top_bracket = %d top
2643    
2644  if (re->options != 0)  if (re->options != 0)
2645    {    {
2646    printf("%s%s%s%s%s%s%s%s\n",    printf("%s%s%s%s%s%s%s%s%s\n",
2647      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",      ((re->options & PCRE_ANCHORED) != 0)? "anchored " : "",
2648      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",      ((re->options & PCRE_CASELESS) != 0)? "caseless " : "",
2649        ((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "",
2650      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",      ((re->options & PCRE_EXTENDED) != 0)? "extended " : "",
2651      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",      ((re->options & PCRE_MULTILINE) != 0)? "multiline " : "",
2652      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",      ((re->options & PCRE_DOTALL) != 0)? "dotall " : "",
# Line 2610  if ((re->options & PCRE_FIRSTSET) != 0) Line 2661  if ((re->options & PCRE_FIRSTSET) != 0)
2661      else printf("First char = \\x%02x\n", re->first_char);      else printf("First char = \\x%02x\n", re->first_char);
2662    }    }
2663    
2664    if ((re->options & PCRE_REQCHSET) != 0)
2665      {
2666      if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char);
2667        else printf("Req char = \\x%02x\n", re->req_char);
2668      }
2669    
2670  code_end = code;  code_end = code;
2671  code_base = code = re->code;  code_base = code = re->code;
2672    
# Line 2843  Returns: TRUE if matched Line 2900  Returns: TRUE if matched
2900    
2901  static BOOL  static BOOL
2902  match_ref(int offset, register const uschar *eptr, int length, match_data *md,  match_ref(int offset, register const uschar *eptr, int length, match_data *md,
2903    int ims)    unsigned long int ims)
2904  {  {
2905  const uschar *p = md->start_subject + md->offset_vector[offset];  const uschar *p = md->start_subject + md->offset_vector[offset];
2906    
# Line 2902  Returns: TRUE if matched Line 2959  Returns: TRUE if matched
2959    
2960  static BOOL  static BOOL
2961  match(register const uschar *eptr, register const uschar *ecode,  match(register const uschar *eptr, register const uschar *ecode,
2962    int offset_top, match_data *md, int ims, BOOL condassert, const uschar *eptrb)    int offset_top, match_data *md, unsigned long int ims, BOOL condassert,
2963      const uschar *eptrb)
2964  {  {
2965  int original_ims = ims;   /* Save for resetting on ')' */  unsigned long int original_ims = ims;   /* Save for resetting on ')' */
2966    
2967  for (;;)  for (;;)
2968    {    {
# Line 3019  for (;;) Line 3077  for (;;)
3077      ecode += 2;      ecode += 2;
3078      break;      break;
3079    
3080      /* End of the pattern */      /* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched
3081        an empty string - recursion will then try other alternatives, if any. */
3082    
3083      case OP_END:      case OP_END:
3084        if (md->notempty && eptr == md->start_match) return FALSE;
3085      md->end_match_ptr = eptr;          /* Record where we ended */      md->end_match_ptr = eptr;          /* Record where we ended */
3086      md->end_offset_top = offset_top;   /* and how many extracts were taken */      md->end_offset_top = offset_top;   /* and how many extracts were taken */
3087      return TRUE;      return TRUE;
# Line 3031  for (;;) Line 3091  for (;;)
3091      case OP_OPT:      case OP_OPT:
3092      ims = ecode[1];      ims = ecode[1];
3093      ecode += 2;      ecode += 2;
3094      DPRINTF(("ims set to %02x\n", ims));      DPRINTF(("ims set to %02lx\n", ims));
3095      break;      break;
3096    
3097      /* Assertion brackets. Check the alternative branches in turn - the      /* Assertion brackets. Check the alternative branches in turn - the
# Line 3138  for (;;) Line 3198  for (;;)
3198        if (ecode[3] == OP_OPT)        if (ecode[3] == OP_OPT)
3199          {          {
3200          ims = (ims & ~PCRE_IMS) | ecode[4];          ims = (ims & ~PCRE_IMS) | ecode[4];
3201          DPRINTF(("ims set to %02x at group repeat\n", ims));          DPRINTF(("ims set to %02lx at group repeat\n", ims));
3202          }          }
3203    
3204        if (*ecode == OP_KETRMIN)        if (*ecode == OP_KETRMIN)
# Line 3232  for (;;) Line 3292  for (;;)
3292        the group. */        the group. */
3293    
3294        ims = original_ims;        ims = original_ims;
3295        DPRINTF(("ims reset to %02x\n", ims));        DPRINTF(("ims reset to %02lx\n", ims));
3296    
3297        /* For a non-repeating ket, just continue at this level. This also        /* For a non-repeating ket, just continue at this level. This also
3298        happens for a repeating ket if no characters were matched in the group.        happens for a repeating ket if no characters were matched in the group.
# Line 4136  pcre_exec(const pcre *external_re, const Line 4196  pcre_exec(const pcre *external_re, const
4196  {  {
4197  int resetcount, ocount;  int resetcount, ocount;
4198  int first_char = -1;  int first_char = -1;
4199  int ims = 0;  int req_char = -1;
4200    int req_char2 = -1;
4201    unsigned long int ims = 0;
4202  match_data match_block;  match_data match_block;
4203  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
4204  const uschar *start_match = (const uschar *)subject + start_offset;  const uschar *start_match = (const uschar *)subject + start_offset;
4205  const uschar *end_subject;  const uschar *end_subject;
4206    const uschar *req_char_ptr = start_match - 1;
4207  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
4208  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
4209  BOOL using_temporary_offsets = FALSE;  BOOL using_temporary_offsets = FALSE;
# Line 4161  match_block.endonly = (re->options & PCR Line 4224  match_block.endonly = (re->options & PCR
4224    
4225  match_block.notbol = (options & PCRE_NOTBOL) != 0;  match_block.notbol = (options & PCRE_NOTBOL) != 0;
4226  match_block.noteol = (options & PCRE_NOTEOL) != 0;  match_block.noteol = (options & PCRE_NOTEOL) != 0;
4227    match_block.notempty = (options & PCRE_NOTEMPTY) != 0;
4228    
4229  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */  match_block.errorcode = PCRE_ERROR_NOMATCH;     /* Default error */
4230    
# Line 4231  if (!anchored) Line 4295  if (!anchored)
4295          start_bits = extra->start_bits;          start_bits = extra->start_bits;
4296    }    }
4297    
4298  /* Loop for unanchored matches; for anchored regexs the loop runs just once. */  /* For anchored or unanchored matches, there may be a "last known required
4299    character" set. If the PCRE_CASELESS is set, implying that the match starts
4300    caselessly, or if there are any changes of this flag within the regex, set up
4301    both cases of the character. Otherwise set the two values the same, which will
4302    avoid duplicate testing (which takes significant time). This covers the vast
4303    majority of cases. It will be suboptimal when the case flag changes in a regex
4304    and the required character in fact is caseful. */
4305    
4306    if ((re->options & PCRE_REQCHSET) != 0)
4307      {
4308      req_char = re->req_char;
4309      req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0)?
4310        (re->tables + fcc_offset)[req_char] : req_char;
4311      }
4312    
4313    /* Loop for handling unanchored repeated matching attempts; for anchored regexs
4314    the loop runs just once. */
4315    
4316  do  do
4317    {    {
# Line 4260  do Line 4340  do
4340    
4341    else if (startline)    else if (startline)
4342      {      {
4343      if (start_match > match_block.start_subject)      if (start_match > match_block.start_subject + start_offset)
4344        {        {
4345        while (start_match < end_subject && start_match[-1] != '\n')        while (start_match < end_subject && start_match[-1] != '\n')
4346          start_match++;          start_match++;
4347        }        }
4348      }      }
4349    
4350    /* Or to a non-unique first char */    /* Or to a non-unique first char after study */
4351    
4352    else if (start_bits != NULL)    else if (start_bits != NULL)
4353      {      {
# Line 4284  do Line 4364  do
4364    printf("\n");    printf("\n");
4365  #endif  #endif
4366    
4367      /* If req_char is set, we know that that character must appear in the subject
4368      for the match to succeed. If the first character is set, req_char must be
4369      later in the subject; otherwise the test starts at the match point. This
4370      optimization can save a huge amount of backtracking in patterns with nested
4371      unlimited repeats that aren't going to match. We don't know what the state of
4372      case matching may be when this character is hit, so test for it in both its
4373      cases if necessary. However, the different cased versions will not be set up
4374      unless PCRE_CASELESS was given or the casing state changes within the regex.
4375      Writing separate code makes it go faster, as does using an autoincrement and
4376      backing off on a match. */
4377    
4378      if (req_char >= 0)
4379        {
4380        register const uschar *p = start_match + ((first_char >= 0)? 1 : 0);
4381    
4382        /* We don't need to repeat the search if we haven't yet reached the
4383        place we found it at last time. */
4384    
4385        if (p > req_char_ptr)
4386          {
4387          /* Do a single test if no case difference is set up */
4388    
4389          if (req_char == req_char2)
4390            {
4391            while (p < end_subject)
4392              {
4393              if (*p++ == req_char) { p--; break; }
4394              }
4395            }
4396    
4397          /* Otherwise test for either case */
4398    
4399          else
4400            {
4401            while (p < end_subject)
4402              {
4403              register int pp = *p++;
4404              if (pp == req_char || pp == req_char2) { p--; break; }
4405              }
4406            }
4407    
4408          /* If we can't find the required character, break the matching loop */
4409    
4410          if (p >= end_subject) break;
4411    
4412          /* If we have found the required character, save the point where we
4413          found it, so that we don't search again next time round the loop if
4414          the start hasn't passed this character yet. */
4415    
4416          req_char_ptr = p;
4417          }
4418        }
4419    
4420    /* When a match occurs, substrings will be set for all internal extractions;    /* When a match occurs, substrings will be set for all internal extractions;
4421    we just need to set up the whole thing as substring 0 before returning. If    we just need to set up the whole thing as substring 0 before returning. If
4422    there were too many extractions, set the return code to zero. In the case    there were too many extractions, set the return code to zero. In the case
# Line 4291  do Line 4424  do
4424    those back references that we can. In this case there need not be overflow    those back references that we can. In this case there need not be overflow
4425    if certain parts of the pattern were not used. */    if certain parts of the pattern were not used. */
4426    
4427      match_block.start_match = start_match;
4428    if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))    if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match))
4429      continue;      continue;
4430    

Legend:
Removed from v.35  
changed lines
  Added in v.39

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12