| 412 |
"\\k is not followed by a braced, angle-bracketed, or quoted name\0" |
"\\k is not followed by a braced, angle-bracketed, or quoted name\0" |
| 413 |
/* 70 */ |
/* 70 */ |
| 414 |
"internal error: unknown opcode in find_fixedlength()\0" |
"internal error: unknown opcode in find_fixedlength()\0" |
| 415 |
|
"\\N is not supported in a class\0" |
| 416 |
; |
; |
| 417 |
|
|
| 418 |
/* Table to identify digits and hex digits. This is used when compiling |
/* Table to identify digits and hex digits. This is used when compiling |
| 1529 |
|
|
| 1530 |
Returns: the fixed length, |
Returns: the fixed length, |
| 1531 |
or -1 if there is no fixed length, |
or -1 if there is no fixed length, |
| 1532 |
or -2 if \C was encountered |
or -2 if \C was encountered (in UTF-8 mode only) |
| 1533 |
or -3 if an OP_RECURSE item was encountered and atend is FALSE |
or -3 if an OP_RECURSE item was encountered and atend is FALSE |
| 1534 |
or -4 if an unknown opcode was encountered (internal error) |
or -4 if an unknown opcode was encountered (internal error) |
| 1535 |
*/ |
*/ |
| 1703 |
cc++; |
cc++; |
| 1704 |
break; |
break; |
| 1705 |
|
|
| 1706 |
/* The single-byte matcher isn't allowed */ |
/* The single-byte matcher isn't allowed. This only happens in UTF-8 mode; |
| 1707 |
|
otherwise \C is coded as OP_ALLANY. */ |
| 1708 |
|
|
| 1709 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 1710 |
return -2; |
return -2; |
| 3354 |
} |
} |
| 3355 |
|
|
| 3356 |
*lengthptr += (int)(code - last_code); |
*lengthptr += (int)(code - last_code); |
| 3357 |
DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c)); |
DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code), |
| 3358 |
|
c)); |
| 3359 |
|
|
| 3360 |
/* If "previous" is set and it is not at the start of the work space, move |
/* If "previous" is set and it is not at the start of the work space, move |
| 3361 |
it back to there, in order to avoid filling up the work space. Otherwise, |
it back to there, in order to avoid filling up the work space. Otherwise, |
| 3771 |
if (*errorcodeptr != 0) goto FAILED; |
if (*errorcodeptr != 0) goto FAILED; |
| 3772 |
|
|
| 3773 |
if (-c == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ |
if (-c == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ |
| 3774 |
|
else if (-c == ESC_N) /* \N is not supported in a class */ |
| 3775 |
|
{ |
| 3776 |
|
*errorcodeptr = ERR71; |
| 3777 |
|
goto FAILED; |
| 3778 |
|
} |
| 3779 |
else if (-c == ESC_Q) /* Handle start of quoted string */ |
else if (-c == ESC_Q) /* Handle start of quoted string */ |
| 3780 |
{ |
{ |
| 3781 |
if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) |
if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) |
| 4432 |
past, but it no longer happens for non-repeated recursions. In fact, the |
past, but it no longer happens for non-repeated recursions. In fact, the |
| 4433 |
repeated ones could be re-implemented independently so as not to need this, |
repeated ones could be re-implemented independently so as not to need this, |
| 4434 |
but for the moment we rely on the code for repeating groups. */ |
but for the moment we rely on the code for repeating groups. */ |
| 4435 |
|
|
| 4436 |
if (*previous == OP_RECURSE) |
if (*previous == OP_RECURSE) |
| 4437 |
{ |
{ |
| 4438 |
memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE); |
memmove(previous + 1 + LINK_SIZE, previous, 1 + LINK_SIZE); |
| 4990 |
ONCE brackets can be converted into non-capturing brackets, as the |
ONCE brackets can be converted into non-capturing brackets, as the |
| 4991 |
behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to |
behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to |
| 4992 |
deal with possessive ONCEs specially. |
deal with possessive ONCEs specially. |
| 4993 |
|
|
| 4994 |
Otherwise, if the quantifier was possessive, we convert the BRA code to |
Otherwise, when we are doing the actual compile phase, check to see |
| 4995 |
the POS form, and the KET code to KETRPOS. (It turns out to be convenient |
whether this group is one that could match an empty string. If so, |
| 4996 |
at runtime to detect this kind of subpattern at both the start and at the |
convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so |
| 4997 |
end.) The use of special opcodes makes it possible to reduce greatly the |
that runtime checking can be done. [This check is also applied to ONCE |
| 4998 |
stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO, |
groups at runtime, but in a different way.] |
| 4999 |
convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that |
|
| 5000 |
the default action below, of wrapping everything inside atomic brackets, |
Then, if the quantifier was possessive and the bracket is not a |
| 5001 |
does not happen. |
conditional, we convert the BRA code to the POS form, and the KET code to |
| 5002 |
|
KETRPOS. (It turns out to be convenient at runtime to detect this kind of |
| 5003 |
Then, when we are doing the actual compile phase, check to see whether |
subpattern at both the start and at the end.) The use of special opcodes |
| 5004 |
this group is one that could match an empty string. If so, convert the |
makes it possible to reduce greatly the stack usage in pcre_exec(). If |
| 5005 |
initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime |
the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. |
| 5006 |
checking can be done. [This check is also applied to ONCE groups at |
|
| 5007 |
runtime, but in a different way.] */ |
Then, if the minimum number of matches is 1 or 0, cancel the possessive |
| 5008 |
|
flag so that the default action below, of wrapping everything inside |
| 5009 |
|
atomic brackets, does not happen. When the minimum is greater than 1, |
| 5010 |
|
there will be earlier copies of the group, and so we still have to wrap |
| 5011 |
|
the whole thing. */ |
| 5012 |
|
|
| 5013 |
else |
else |
| 5014 |
{ |
{ |
| 5015 |
uschar *ketcode = code - 1 - LINK_SIZE; |
uschar *ketcode = code - 1 - LINK_SIZE; |
| 5016 |
uschar *bracode = ketcode - GET(ketcode, 1); |
uschar *bracode = ketcode - GET(ketcode, 1); |
| 5017 |
|
|
| 5018 |
|
/* Convert possessive ONCE brackets to non-capturing */ |
| 5019 |
|
|
| 5020 |
if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && |
if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && |
| 5021 |
possessive_quantifier) *bracode = OP_BRA; |
possessive_quantifier) *bracode = OP_BRA; |
| 5022 |
|
|
| 5023 |
|
/* For non-possessive ONCE brackets, all we need to do is to |
| 5024 |
|
set the KET. */ |
| 5025 |
|
|
| 5026 |
if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) |
if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) |
| 5027 |
*ketcode = OP_KETRMAX + repeat_type; |
*ketcode = OP_KETRMAX + repeat_type; |
| 5028 |
|
|
| 5029 |
|
/* Handle non-ONCE brackets and possessive ONCEs (which have been |
| 5030 |
|
converted to non-capturing above). */ |
| 5031 |
|
|
| 5032 |
else |
else |
| 5033 |
{ |
{ |
| 5034 |
if (possessive_quantifier) |
/* In the compile phase, check for empty string matching. */ |
| 5035 |
{ |
|
|
*bracode += 1; /* Switch to xxxPOS opcodes */ |
|
|
*ketcode = OP_KETRPOS; |
|
|
if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; |
|
|
possessive_quantifier = FALSE; |
|
|
} |
|
|
else *ketcode = OP_KETRMAX + repeat_type; |
|
|
|
|
| 5036 |
if (lengthptr == NULL) |
if (lengthptr == NULL) |
| 5037 |
{ |
{ |
| 5038 |
uschar *scode = bracode; |
uschar *scode = bracode; |
| 5047 |
} |
} |
| 5048 |
while (*scode == OP_ALT); |
while (*scode == OP_ALT); |
| 5049 |
} |
} |
| 5050 |
|
|
| 5051 |
|
/* Handle possessive quantifiers. */ |
| 5052 |
|
|
| 5053 |
|
if (possessive_quantifier) |
| 5054 |
|
{ |
| 5055 |
|
/* For COND brackets, we wrap the whole thing in a possessively |
| 5056 |
|
repeated non-capturing bracket, because we have not invented POS |
| 5057 |
|
versions of the COND opcodes. Because we are moving code along, we |
| 5058 |
|
must ensure that any pending recursive references are updated. */ |
| 5059 |
|
|
| 5060 |
|
if (*bracode == OP_COND || *bracode == OP_SCOND) |
| 5061 |
|
{ |
| 5062 |
|
int nlen = (int)(code - bracode); |
| 5063 |
|
*code = OP_END; |
| 5064 |
|
adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm); |
| 5065 |
|
memmove(bracode + 1+LINK_SIZE, bracode, nlen); |
| 5066 |
|
code += 1 + LINK_SIZE; |
| 5067 |
|
nlen += 1 + LINK_SIZE; |
| 5068 |
|
*bracode = OP_BRAPOS; |
| 5069 |
|
*code++ = OP_KETRPOS; |
| 5070 |
|
PUTINC(code, 0, nlen); |
| 5071 |
|
PUT(bracode, 1, nlen); |
| 5072 |
|
} |
| 5073 |
|
|
| 5074 |
|
/* For non-COND brackets, we modify the BRA code and use KETRPOS. */ |
| 5075 |
|
|
| 5076 |
|
else |
| 5077 |
|
{ |
| 5078 |
|
*bracode += 1; /* Switch to xxxPOS opcodes */ |
| 5079 |
|
*ketcode = OP_KETRPOS; |
| 5080 |
|
} |
| 5081 |
|
|
| 5082 |
|
/* If the minimum is zero, mark it as possessive, then unset the |
| 5083 |
|
possessive flag when the minimum is 0 or 1. */ |
| 5084 |
|
|
| 5085 |
|
if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; |
| 5086 |
|
if (repeat_min < 2) possessive_quantifier = FALSE; |
| 5087 |
|
} |
| 5088 |
|
|
| 5089 |
|
/* Non-possessive quantifier */ |
| 5090 |
|
|
| 5091 |
|
else *ketcode = OP_KETRMAX + repeat_type; |
| 5092 |
} |
} |
| 5093 |
} |
} |
| 5094 |
} |
} |
| 5115 |
notation is just syntactic sugar, taken from Sun's Java package, but the |
notation is just syntactic sugar, taken from Sun's Java package, but the |
| 5116 |
special opcodes can optimize it. |
special opcodes can optimize it. |
| 5117 |
|
|
| 5118 |
Possessively repeated subpatterns have already been handled in the code |
Some (but not all) possessively repeated subpatterns have already been |
| 5119 |
just above, so possessive_quantifier is always FALSE for them at this |
completely handled in the code just above. For them, possessive_quantifier |
| 5120 |
stage. |
is always FALSE at this stage. |
| 5121 |
|
|
| 5122 |
Note that the repeated item starts at tempcode, not at previous, which |
Note that the repeated item starts at tempcode, not at previous, which |
| 5123 |
might be the first part of a string whose (former) last char we repeated. |
might be the first part of a string whose (former) last char we repeated. |
| 5607 |
|
|
| 5608 |
/* ------------------------------------------------------------ */ |
/* ------------------------------------------------------------ */ |
| 5609 |
case CHAR_C: /* Callout - may be followed by digits; */ |
case CHAR_C: /* Callout - may be followed by digits; */ |
| 5610 |
previous_callout = code; /* Save for later completion */ |
previous_callout = code; /* Save for later completion */ |
| 5611 |
after_manual_callout = 1; /* Skip one item before completing */ |
after_manual_callout = 1; /* Skip one item before completing */ |
| 5612 |
*code++ = OP_CALLOUT; |
*code++ = OP_CALLOUT; |
| 5613 |
{ |
{ |
| 5614 |
int n = 0; |
int n = 0; |
| 6485 |
} |
} |
| 6486 |
else |
else |
| 6487 |
#endif |
#endif |
| 6488 |
{ |
/* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE |
| 6489 |
|
so that it works in DFA mode and in lookbehinds. */ |
| 6490 |
|
|
| 6491 |
|
{ |
| 6492 |
previous = (-c > ESC_b && -c < ESC_Z)? code : NULL; |
previous = (-c > ESC_b && -c < ESC_Z)? code : NULL; |
| 6493 |
*code++ = -c; |
*code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c; |
| 6494 |
} |
} |
| 6495 |
} |
} |
| 6496 |
continue; |
continue; |