/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 605 by ph10, Fri Jun 3 18:18:30 2011 UTC revision 621 by ph10, Mon Jul 18 10:14:09 2011 UTC
# Line 1694  _pcre_find_bracket(const uschar *code, B Line 1694  _pcre_find_bracket(const uschar *code, B
1694  for (;;)  for (;;)
1695    {    {
1696    register int c = *code;    register int c = *code;
1697    
1698    if (c == OP_END) return NULL;    if (c == OP_END) return NULL;
1699    
1700    /* XCLASS is used for classes that cannot be represented just by a bit    /* XCLASS is used for classes that cannot be represented just by a bit
# Line 4207  for (;; ptr++) Line 4208  for (;; ptr++)
4208        ptr++;        ptr++;
4209        }        }
4210      else repeat_type = greedy_default;      else repeat_type = greedy_default;
4211    
4212        /* If previous was a recursion call, wrap it in atomic brackets so that
4213        previous becomes the atomic group. All recursions were so wrapped in the
4214        past, but it no longer happens for non-repeated recursions. In fact, the
4215        repeated ones could be re-implemented independently so as not to need this,
4216        but for the moment we rely on the code for repeating groups. */
4217    
4218        if (*previous == OP_RECURSE)
4219          {
4220          memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE);
4221          *previous = OP_ONCE;
4222          PUT(previous, 1, 2 + 2*LINK_SIZE);
4223          previous[2 + 2*LINK_SIZE] = OP_KET;
4224          PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE);
4225          code += 2 + 2 * LINK_SIZE;
4226          length_prevgroup = 3 + 3*LINK_SIZE;
4227    
4228          /* When actually compiling, we need to check whether this was a forward
4229          reference, and if so, adjust the offset. */
4230    
4231          if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE)
4232            {
4233            int offset = GET(cd->hwm, -LINK_SIZE);
4234            if (offset == previous + 1 - cd->start_code)
4235              PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE);
4236            }
4237          }
4238    
4239        /* Now handle repetition for the different types of item. */
4240    
4241      /* If previous was a character match, abolish the item and generate a      /* If previous was a character match, abolish the item and generate a
4242      repeat item instead. If a char item has a minumum of more than one, ensure      repeat item instead. If a char item has a minumum of more than one, ensure
# Line 4726  for (;; ptr++) Line 4756  for (;; ptr++)
4756          }          }
4757    
4758        /* If the maximum is unlimited, set a repeater in the final copy. For        /* If the maximum is unlimited, set a repeater in the final copy. For
4759        ONCE brackets, that's all we need to do.        ONCE brackets, that's all we need to do. However, possessively repeated
4760          ONCE brackets can be converted into non-capturing brackets, as the
4761          behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to
4762          deal with possessive ONCEs specially.
4763    
4764        Otherwise, if the quantifier was possessive, we convert the BRA code to        Otherwise, if the quantifier was possessive, we convert the BRA code to
4765        the POS form, and the KET code to KETRPOS. (It turns out to be convenient        the POS form, and the KET code to KETRPOS. (It turns out to be convenient
# Line 4747  for (;; ptr++) Line 4780  for (;; ptr++)
4780          {          {
4781          uschar *ketcode = code - 1 - LINK_SIZE;          uschar *ketcode = code - 1 - LINK_SIZE;
4782          uschar *bracode = ketcode - GET(ketcode, 1);          uschar *bracode = ketcode - GET(ketcode, 1);
4783    
4784          if (*bracode == OP_ONCE)          if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA;
4785            if (*bracode == OP_ONCE)
4786            *ketcode = OP_KETRMAX + repeat_type;            *ketcode = OP_KETRMAX + repeat_type;
4787          else          else
4788            {            {
# Line 4931  for (;; ptr++) Line 4965  for (;; ptr++)
4965          if (namelen == verbs[i].len &&          if (namelen == verbs[i].len &&
4966              strncmp((char *)name, vn, namelen) == 0)              strncmp((char *)name, vn, namelen) == 0)
4967            {            {
4968            /* Check for open captures before ACCEPT */            /* Check for open captures before ACCEPT and convert it to
4969              ASSERT_ACCEPT if in an assertion. */
4970    
4971            if (verbs[i].op == OP_ACCEPT)            if (verbs[i].op == OP_ACCEPT)
4972              {              {
4973              open_capitem *oc;              open_capitem *oc;
4974                if (arglen != 0)
4975                  {
4976                  *errorcodeptr = ERR59;
4977                  goto FAILED;
4978                  }
4979              cd->had_accept = TRUE;              cd->had_accept = TRUE;
4980              for (oc = cd->open_caps; oc != NULL; oc = oc->next)              for (oc = cd->open_caps; oc != NULL; oc = oc->next)
4981                {                {
4982                *code++ = OP_CLOSE;                *code++ = OP_CLOSE;
4983                PUT2INC(code, 0, oc->number);                PUT2INC(code, 0, oc->number);
4984                }                }
4985                *code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT;
4986              }              }
4987    
4988            /* Handle the cases with/without an argument */            /* Handle other cases with/without an argument */
4989    
4990            if (arglen == 0)            else if (arglen == 0)
4991              {              {
4992              if (verbs[i].op < 0)   /* Argument is mandatory */              if (verbs[i].op < 0)   /* Argument is mandatory */
4993                {                {
# Line 5235  for (;; ptr++) Line 5276  for (;; ptr++)
5276          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5277          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */          case CHAR_EQUALS_SIGN:                 /* Positive lookahead */
5278          bravalue = OP_ASSERT;          bravalue = OP_ASSERT;
5279            cd->assert_depth += 1;
5280          ptr++;          ptr++;
5281          break;          break;
5282    
# Line 5249  for (;; ptr++) Line 5291  for (;; ptr++)
5291            continue;            continue;
5292            }            }
5293          bravalue = OP_ASSERT_NOT;          bravalue = OP_ASSERT_NOT;
5294            cd->assert_depth += 1;
5295          break;          break;
5296    
5297    
# Line 5258  for (;; ptr++) Line 5301  for (;; ptr++)
5301            {            {
5302            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */            case CHAR_EQUALS_SIGN:               /* Positive lookbehind */
5303            bravalue = OP_ASSERTBACK;            bravalue = OP_ASSERTBACK;
5304              cd->assert_depth += 1;
5305            ptr += 2;            ptr += 2;
5306            break;            break;
5307    
5308            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */            case CHAR_EXCLAMATION_MARK:          /* Negative lookbehind */
5309            bravalue = OP_ASSERTBACK_NOT;            bravalue = OP_ASSERTBACK_NOT;
5310              cd->assert_depth += 1;
5311            ptr += 2;            ptr += 2;
5312            break;            break;
5313    
# Line 5653  for (;; ptr++) Line 5698  for (;; ptr++)
5698    
5699                /* Fudge the value of "called" so that when it is inserted as an                /* Fudge the value of "called" so that when it is inserted as an
5700                offset below, what it actually inserted is the reference number                offset below, what it actually inserted is the reference number
5701                of the group. */                of the group. Then remember the forward reference. */
5702    
5703                called = cd->start_code + recno;                called = cd->start_code + recno;
5704                PUTINC(cd->hwm, 0, (int)(code + 2 + LINK_SIZE - cd->start_code));                PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));
5705                }                }
5706    
5707              /* If not a forward reference, and the subpattern is still open,              /* If not a forward reference, and the subpattern is still open,
# Line 5671  for (;; ptr++) Line 5716  for (;; ptr++)
5716                }                }
5717              }              }
5718    
5719            /* Insert the recursion/subroutine item, automatically wrapped inside            /* Insert the recursion/subroutine item. */
5720            "once" brackets. Set up a "previous group" length so that a  
           subsequent quantifier will work. */  
   
           *code = OP_ONCE;  
           PUT(code, 1, 2 + 2*LINK_SIZE);  
           code += 1 + LINK_SIZE;  
   
5721            *code = OP_RECURSE;            *code = OP_RECURSE;
5722            PUT(code, 1, (int)(called - cd->start_code));            PUT(code, 1, (int)(called - cd->start_code));
5723            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
   
           *code = OP_KET;  
           PUT(code, 1, 2 + 2*LINK_SIZE);  
           code += 1 + LINK_SIZE;  
   
           length_prevgroup = 3 + 3*LINK_SIZE;  
5724            }            }
5725    
5726          /* Can't determine a first byte now */          /* Can't determine a first byte now */
# Line 5830  for (;; ptr++) Line 5863  for (;; ptr++)
5863             &length_prevgroup           /* Pre-compile phase */             &length_prevgroup           /* Pre-compile phase */
5864           ))           ))
5865        goto FAILED;        goto FAILED;
5866    
5867        if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT)
5868          cd->assert_depth -= 1;
5869    
5870      /* At the end of compiling, code is still pointing to the start of the      /* At the end of compiling, code is still pointing to the start of the
5871      group, while tempcode has been updated to point past the end of the group      group, while tempcode has been updated to point past the end of the group
# Line 6976  utf8 = (options & PCRE_UTF8) != 0; Line 7012  utf8 = (options & PCRE_UTF8) != 0;
7012    
7013  /* Can't support UTF8 unless PCRE has been compiled to include the code. The  /* Can't support UTF8 unless PCRE has been compiled to include the code. The
7014  return of an error code from _pcre_valid_utf8() is a new feature, introduced in  return of an error code from _pcre_valid_utf8() is a new feature, introduced in
7015  release 8.13. The only use we make of it here is to adjust the offset value to  release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is
7016  the end of the string for a short string error, for compatibility with previous  not used here. */
 versions. */  
7017    
7018  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
7019  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
7020       (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1, &errorcode)) >= 0)       (errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0)
7021    {    {
7022    errorcode = ERR44;    errorcode = ERR44;
7023    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 7153  field; this time it's used for rememberi Line 7188  field; this time it's used for rememberi
7188  */  */
7189    
7190  cd->final_bracount = cd->bracount;  /* Save for checking forward references */  cd->final_bracount = cd->bracount;  /* Save for checking forward references */
7191    cd->assert_depth = 0;
7192  cd->bracount = 0;  cd->bracount = 0;
7193  cd->names_found = 0;  cd->names_found = 0;
7194  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (uschar *)re + re->name_table_offset;

Legend:
Removed from v.605  
changed lines
  Added in v.621

webmaster@exim.org
ViewVC Help
Powered by ViewVC 1.1.12