| 1694 |
for (;;) |
for (;;) |
| 1695 |
{ |
{ |
| 1696 |
register int c = *code; |
register int c = *code; |
| 1697 |
|
|
| 1698 |
if (c == OP_END) return NULL; |
if (c == OP_END) return NULL; |
| 1699 |
|
|
| 1700 |
/* XCLASS is used for classes that cannot be represented just by a bit |
/* XCLASS is used for classes that cannot be represented just by a bit |
| 4208 |
ptr++; |
ptr++; |
| 4209 |
} |
} |
| 4210 |
else repeat_type = greedy_default; |
else repeat_type = greedy_default; |
| 4211 |
|
|
| 4212 |
|
/* If previous was a recursion call, wrap it in atomic brackets so that |
| 4213 |
|
previous becomes the atomic group. All recursions were so wrapped in the |
| 4214 |
|
past, but it no longer happens for non-repeated recursions. In fact, the |
| 4215 |
|
repeated ones could be re-implemented independently so as not to need this, |
| 4216 |
|
but for the moment we rely on the code for repeating groups. */ |
| 4217 |
|
|
| 4218 |
|
if (*previous == OP_RECURSE) |
| 4219 |
|
{ |
| 4220 |
|
memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE); |
| 4221 |
|
*previous = OP_ONCE; |
| 4222 |
|
PUT(previous, 1, 2 + 2*LINK_SIZE); |
| 4223 |
|
previous[2 + 2*LINK_SIZE] = OP_KET; |
| 4224 |
|
PUT(previous, 3 + 2*LINK_SIZE, 2 + 2*LINK_SIZE); |
| 4225 |
|
code += 2 + 2 * LINK_SIZE; |
| 4226 |
|
length_prevgroup = 3 + 3*LINK_SIZE; |
| 4227 |
|
|
| 4228 |
|
/* When actually compiling, we need to check whether this was a forward |
| 4229 |
|
reference, and if so, adjust the offset. */ |
| 4230 |
|
|
| 4231 |
|
if (lengthptr == NULL && cd->hwm >= cd->start_workspace + LINK_SIZE) |
| 4232 |
|
{ |
| 4233 |
|
int offset = GET(cd->hwm, -LINK_SIZE); |
| 4234 |
|
if (offset == previous + 1 - cd->start_code) |
| 4235 |
|
PUT(cd->hwm, -LINK_SIZE, offset + 1 + LINK_SIZE); |
| 4236 |
|
} |
| 4237 |
|
} |
| 4238 |
|
|
| 4239 |
|
/* Now handle repetition for the different types of item. */ |
| 4240 |
|
|
| 4241 |
/* If previous was a character match, abolish the item and generate a |
/* If previous was a character match, abolish the item and generate a |
| 4242 |
repeat item instead. If a char item has a minumum of more than one, ensure |
repeat item instead. If a char item has a minumum of more than one, ensure |
| 4756 |
} |
} |
| 4757 |
|
|
| 4758 |
/* If the maximum is unlimited, set a repeater in the final copy. For |
/* If the maximum is unlimited, set a repeater in the final copy. For |
| 4759 |
ONCE brackets, that's all we need to do. |
ONCE brackets, that's all we need to do. However, possessively repeated |
| 4760 |
|
ONCE brackets can be converted into non-capturing brackets, as the |
| 4761 |
|
behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to |
| 4762 |
|
deal with possessive ONCEs specially. |
| 4763 |
|
|
| 4764 |
Otherwise, if the quantifier was possessive, we convert the BRA code to |
Otherwise, if the quantifier was possessive, we convert the BRA code to |
| 4765 |
the POS form, and the KET code to KETRPOS. (It turns out to be convenient |
the POS form, and the KET code to KETRPOS. (It turns out to be convenient |
| 4780 |
{ |
{ |
| 4781 |
uschar *ketcode = code - 1 - LINK_SIZE; |
uschar *ketcode = code - 1 - LINK_SIZE; |
| 4782 |
uschar *bracode = ketcode - GET(ketcode, 1); |
uschar *bracode = ketcode - GET(ketcode, 1); |
| 4783 |
|
|
| 4784 |
if (*bracode == OP_ONCE) |
if (*bracode == OP_ONCE && possessive_quantifier) *bracode = OP_BRA; |
| 4785 |
|
if (*bracode == OP_ONCE) |
| 4786 |
*ketcode = OP_KETRMAX + repeat_type; |
*ketcode = OP_KETRMAX + repeat_type; |
| 4787 |
else |
else |
| 4788 |
{ |
{ |
| 4965 |
if (namelen == verbs[i].len && |
if (namelen == verbs[i].len && |
| 4966 |
strncmp((char *)name, vn, namelen) == 0) |
strncmp((char *)name, vn, namelen) == 0) |
| 4967 |
{ |
{ |
| 4968 |
/* Check for open captures before ACCEPT */ |
/* Check for open captures before ACCEPT and convert it to |
| 4969 |
|
ASSERT_ACCEPT if in an assertion. */ |
| 4970 |
|
|
| 4971 |
if (verbs[i].op == OP_ACCEPT) |
if (verbs[i].op == OP_ACCEPT) |
| 4972 |
{ |
{ |
| 4973 |
open_capitem *oc; |
open_capitem *oc; |
| 4974 |
|
if (arglen != 0) |
| 4975 |
|
{ |
| 4976 |
|
*errorcodeptr = ERR59; |
| 4977 |
|
goto FAILED; |
| 4978 |
|
} |
| 4979 |
cd->had_accept = TRUE; |
cd->had_accept = TRUE; |
| 4980 |
for (oc = cd->open_caps; oc != NULL; oc = oc->next) |
for (oc = cd->open_caps; oc != NULL; oc = oc->next) |
| 4981 |
{ |
{ |
| 4982 |
*code++ = OP_CLOSE; |
*code++ = OP_CLOSE; |
| 4983 |
PUT2INC(code, 0, oc->number); |
PUT2INC(code, 0, oc->number); |
| 4984 |
} |
} |
| 4985 |
|
*code++ = (cd->assert_depth > 0)? OP_ASSERT_ACCEPT : OP_ACCEPT; |
| 4986 |
} |
} |
| 4987 |
|
|
| 4988 |
/* Handle the cases with/without an argument */ |
/* Handle other cases with/without an argument */ |
| 4989 |
|
|
| 4990 |
if (arglen == 0) |
else if (arglen == 0) |
| 4991 |
{ |
{ |
| 4992 |
if (verbs[i].op < 0) /* Argument is mandatory */ |
if (verbs[i].op < 0) /* Argument is mandatory */ |
| 4993 |
{ |
{ |
| 5276 |
/* ------------------------------------------------------------ */ |
/* ------------------------------------------------------------ */ |
| 5277 |
case CHAR_EQUALS_SIGN: /* Positive lookahead */ |
case CHAR_EQUALS_SIGN: /* Positive lookahead */ |
| 5278 |
bravalue = OP_ASSERT; |
bravalue = OP_ASSERT; |
| 5279 |
|
cd->assert_depth += 1; |
| 5280 |
ptr++; |
ptr++; |
| 5281 |
break; |
break; |
| 5282 |
|
|
| 5291 |
continue; |
continue; |
| 5292 |
} |
} |
| 5293 |
bravalue = OP_ASSERT_NOT; |
bravalue = OP_ASSERT_NOT; |
| 5294 |
|
cd->assert_depth += 1; |
| 5295 |
break; |
break; |
| 5296 |
|
|
| 5297 |
|
|
| 5301 |
{ |
{ |
| 5302 |
case CHAR_EQUALS_SIGN: /* Positive lookbehind */ |
case CHAR_EQUALS_SIGN: /* Positive lookbehind */ |
| 5303 |
bravalue = OP_ASSERTBACK; |
bravalue = OP_ASSERTBACK; |
| 5304 |
|
cd->assert_depth += 1; |
| 5305 |
ptr += 2; |
ptr += 2; |
| 5306 |
break; |
break; |
| 5307 |
|
|
| 5308 |
case CHAR_EXCLAMATION_MARK: /* Negative lookbehind */ |
case CHAR_EXCLAMATION_MARK: /* Negative lookbehind */ |
| 5309 |
bravalue = OP_ASSERTBACK_NOT; |
bravalue = OP_ASSERTBACK_NOT; |
| 5310 |
|
cd->assert_depth += 1; |
| 5311 |
ptr += 2; |
ptr += 2; |
| 5312 |
break; |
break; |
| 5313 |
|
|
| 5698 |
|
|
| 5699 |
/* Fudge the value of "called" so that when it is inserted as an |
/* Fudge the value of "called" so that when it is inserted as an |
| 5700 |
offset below, what it actually inserted is the reference number |
offset below, what it actually inserted is the reference number |
| 5701 |
of the group. */ |
of the group. Then remember the forward reference. */ |
| 5702 |
|
|
| 5703 |
called = cd->start_code + recno; |
called = cd->start_code + recno; |
| 5704 |
PUTINC(cd->hwm, 0, (int)(code + 2 + LINK_SIZE - cd->start_code)); |
PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code)); |
| 5705 |
} |
} |
| 5706 |
|
|
| 5707 |
/* If not a forward reference, and the subpattern is still open, |
/* If not a forward reference, and the subpattern is still open, |
| 5716 |
} |
} |
| 5717 |
} |
} |
| 5718 |
|
|
| 5719 |
/* Insert the recursion/subroutine item, automatically wrapped inside |
/* Insert the recursion/subroutine item. */ |
| 5720 |
"once" brackets. Set up a "previous group" length so that a |
|
|
subsequent quantifier will work. */ |
|
|
|
|
|
*code = OP_ONCE; |
|
|
PUT(code, 1, 2 + 2*LINK_SIZE); |
|
|
code += 1 + LINK_SIZE; |
|
|
|
|
| 5721 |
*code = OP_RECURSE; |
*code = OP_RECURSE; |
| 5722 |
PUT(code, 1, (int)(called - cd->start_code)); |
PUT(code, 1, (int)(called - cd->start_code)); |
| 5723 |
code += 1 + LINK_SIZE; |
code += 1 + LINK_SIZE; |
|
|
|
|
*code = OP_KET; |
|
|
PUT(code, 1, 2 + 2*LINK_SIZE); |
|
|
code += 1 + LINK_SIZE; |
|
|
|
|
|
length_prevgroup = 3 + 3*LINK_SIZE; |
|
| 5724 |
} |
} |
| 5725 |
|
|
| 5726 |
/* Can't determine a first byte now */ |
/* Can't determine a first byte now */ |
| 5863 |
&length_prevgroup /* Pre-compile phase */ |
&length_prevgroup /* Pre-compile phase */ |
| 5864 |
)) |
)) |
| 5865 |
goto FAILED; |
goto FAILED; |
| 5866 |
|
|
| 5867 |
|
if (bravalue >= OP_ASSERT && bravalue <= OP_ASSERTBACK_NOT) |
| 5868 |
|
cd->assert_depth -= 1; |
| 5869 |
|
|
| 5870 |
/* At the end of compiling, code is still pointing to the start of the |
/* At the end of compiling, code is still pointing to the start of the |
| 5871 |
group, while tempcode has been updated to point past the end of the group |
group, while tempcode has been updated to point past the end of the group |
| 7012 |
|
|
| 7013 |
/* Can't support UTF8 unless PCRE has been compiled to include the code. The |
/* Can't support UTF8 unless PCRE has been compiled to include the code. The |
| 7014 |
return of an error code from _pcre_valid_utf8() is a new feature, introduced in |
return of an error code from _pcre_valid_utf8() is a new feature, introduced in |
| 7015 |
release 8.13. The only use we make of it here is to adjust the offset value to |
release 8.13. It is passed back from pcre_[dfa_]exec(), but at the moment is |
| 7016 |
the end of the string for a short string error, for compatibility with previous |
not used here. */ |
|
versions. */ |
|
| 7017 |
|
|
| 7018 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 7019 |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && |
| 7020 |
(*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1, &errorcode)) >= 0) |
(errorcode = _pcre_valid_utf8((USPTR)pattern, -1, erroroffset)) != 0) |
| 7021 |
{ |
{ |
| 7022 |
errorcode = ERR44; |
errorcode = ERR44; |
| 7023 |
goto PCRE_EARLY_ERROR_RETURN2; |
goto PCRE_EARLY_ERROR_RETURN2; |
| 7188 |
*/ |
*/ |
| 7189 |
|
|
| 7190 |
cd->final_bracount = cd->bracount; /* Save for checking forward references */ |
cd->final_bracount = cd->bracount; /* Save for checking forward references */ |
| 7191 |
|
cd->assert_depth = 0; |
| 7192 |
cd->bracount = 0; |
cd->bracount = 0; |
| 7193 |
cd->names_found = 0; |
cd->names_found = 0; |
| 7194 |
cd->name_table = (uschar *)re + re->name_table_offset; |
cd->name_table = (uschar *)re + re->name_table_offset; |