| 1597 |
case OP_TYPEEXACT: |
case OP_TYPEEXACT: |
| 1598 |
return FALSE; |
return FALSE; |
| 1599 |
|
|
| 1600 |
|
/* These are going to continue, as they may be empty, but we have to |
| 1601 |
|
fudge the length for the \p and \P cases. */ |
| 1602 |
|
|
| 1603 |
|
case OP_TYPESTAR: |
| 1604 |
|
case OP_TYPEMINSTAR: |
| 1605 |
|
case OP_TYPEPOSSTAR: |
| 1606 |
|
case OP_TYPEQUERY: |
| 1607 |
|
case OP_TYPEMINQUERY: |
| 1608 |
|
case OP_TYPEPOSQUERY: |
| 1609 |
|
if (code[1] == OP_PROP || code[1] == OP_NOTPROP) code += 2; |
| 1610 |
|
break; |
| 1611 |
|
|
| 1612 |
|
/* Same for these */ |
| 1613 |
|
|
| 1614 |
|
case OP_TYPEUPTO: |
| 1615 |
|
case OP_TYPEMINUPTO: |
| 1616 |
|
case OP_TYPEPOSUPTO: |
| 1617 |
|
if (code[3] == OP_PROP || code[3] == OP_NOTPROP) code += 2; |
| 1618 |
|
break; |
| 1619 |
|
|
| 1620 |
/* End of branch */ |
/* End of branch */ |
| 1621 |
|
|
| 1622 |
case OP_KET: |
case OP_KET: |
| 1779 |
uschar *save_hwm) |
uschar *save_hwm) |
| 1780 |
{ |
{ |
| 1781 |
uschar *ptr = group; |
uschar *ptr = group; |
| 1782 |
|
|
| 1783 |
while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL) |
while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL) |
| 1784 |
{ |
{ |
| 1785 |
int offset; |
int offset; |
| 3196 |
goto FAILED; |
goto FAILED; |
| 3197 |
} |
} |
| 3198 |
|
|
| 3199 |
|
/* Remember whether \r or \n are in this class */ |
| 3200 |
|
|
| 3201 |
|
if (negate_class) |
| 3202 |
|
{ |
| 3203 |
|
if ((classbits[1] & 0x24) != 0x24) cd->external_options |= PCRE_HASCRORLF; |
| 3204 |
|
} |
| 3205 |
|
else |
| 3206 |
|
{ |
| 3207 |
|
if ((classbits[1] & 0x24) != 0) cd->external_options |= PCRE_HASCRORLF; |
| 3208 |
|
} |
| 3209 |
|
|
| 3210 |
/* If class_charcount is 1, we saw precisely one character whose value is |
/* If class_charcount is 1, we saw precisely one character whose value is |
| 3211 |
less than 256. As long as there were no characters >= 128 and there was no |
less than 256. As long as there were no characters >= 128 and there was no |
| 3212 |
use of \p or \P, in other words, no use of any XCLASS features, we can |
use of \p or \P, in other words, no use of any XCLASS features, we can |
| 3213 |
optimize. |
optimize. |
| 3214 |
|
|
| 3215 |
In UTF-8 mode, we can optimize the negative case only if there were no |
In UTF-8 mode, we can optimize the negative case only if there were no |
| 3216 |
characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR |
characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR |
| 3217 |
operate on single-bytes only. This is an historical hangover. Maybe one day |
operate on single-bytes only. This is an historical hangover. Maybe one day |
| 3225 |
reqbyte, save the previous value for reinstating. */ |
reqbyte, save the previous value for reinstating. */ |
| 3226 |
|
|
| 3227 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 3228 |
if (class_charcount == 1 && !class_utf8 && |
if (class_charcount == 1 && !class_utf8 && |
| 3229 |
(!utf8 || !negate_class || class_lastchar < 128)) |
(!utf8 || !negate_class || class_lastchar < 128)) |
| 3230 |
#else |
#else |
| 3231 |
if (class_charcount == 1) |
if (class_charcount == 1) |
| 5062 |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR; |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR; |
| 5063 |
for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; |
for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; |
| 5064 |
|
|
| 5065 |
|
/* Remember if \r or \n were seen */ |
| 5066 |
|
|
| 5067 |
|
if (mcbuffer[0] == '\r' || mcbuffer[0] == '\n') |
| 5068 |
|
cd->external_options |= PCRE_HASCRORLF; |
| 5069 |
|
|
| 5070 |
/* Set the first and required bytes appropriately. If no previous first |
/* Set the first and required bytes appropriately. If no previous first |
| 5071 |
byte, set it from this character, but revert to none on a zero repeat. |
byte, set it from this character, but revert to none on a zero repeat. |
| 5072 |
Otherwise, leave the firstbyte value alone, and don't change it on a zero |
Otherwise, leave the firstbyte value alone, and don't change it on a zero |
| 5666 |
int length = 1; /* For final END opcode */ |
int length = 1; /* For final END opcode */ |
| 5667 |
int firstbyte, reqbyte, newline; |
int firstbyte, reqbyte, newline; |
| 5668 |
int errorcode = 0; |
int errorcode = 0; |
| 5669 |
|
int skipatstart = 0; |
| 5670 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 5671 |
BOOL utf8; |
BOOL utf8; |
| 5672 |
#endif |
#endif |
| 5745 |
cd->cbits = tables + cbits_offset; |
cd->cbits = tables + cbits_offset; |
| 5746 |
cd->ctypes = tables + ctypes_offset; |
cd->ctypes = tables + ctypes_offset; |
| 5747 |
|
|
| 5748 |
|
/* Check for newline settings at the start of the pattern, and remember the |
| 5749 |
|
offset for later. */ |
| 5750 |
|
|
| 5751 |
|
if (ptr[0] == '(' && ptr[1] == '*') |
| 5752 |
|
{ |
| 5753 |
|
int newnl = 0; |
| 5754 |
|
if (strncmp((char *)(ptr+2), "CR)", 3) == 0) |
| 5755 |
|
{ skipatstart = 5; newnl = PCRE_NEWLINE_CR; } |
| 5756 |
|
else if (strncmp((char *)(ptr+2), "LF)", 3) == 0) |
| 5757 |
|
{ skipatstart = 5; newnl = PCRE_NEWLINE_LF; } |
| 5758 |
|
else if (strncmp((char *)(ptr+2), "CRLF)", 5) == 0) |
| 5759 |
|
{ skipatstart = 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; } |
| 5760 |
|
else if (strncmp((char *)(ptr+2), "ANY)", 4) == 0) |
| 5761 |
|
{ skipatstart = 6; newnl = PCRE_NEWLINE_ANY; } |
| 5762 |
|
else if (strncmp((char *)(ptr+2), "ANYCRLF)", 8) == 0) |
| 5763 |
|
{ skipatstart = 10; newnl = PCRE_NEWLINE_ANYCRLF; } |
| 5764 |
|
if (skipatstart > 0) |
| 5765 |
|
options = (options & ~PCRE_NEWLINE_BITS) | newnl; |
| 5766 |
|
} |
| 5767 |
|
|
| 5768 |
/* Handle different types of newline. The three bits give seven cases. The |
/* Handle different types of newline. The three bits give seven cases. The |
| 5769 |
current code allows for fixed one- or two-byte sequences, plus "any" and |
current code allows for fixed one- or two-byte sequences, plus "any" and |
| 5770 |
"anycrlf". */ |
"anycrlf". */ |
| 5771 |
|
|
| 5772 |
switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY)) |
switch (options & PCRE_NEWLINE_BITS) |
| 5773 |
{ |
{ |
| 5774 |
case 0: newline = NEWLINE; break; /* Compile-time default */ |
case 0: newline = NEWLINE; break; /* Build-time default */ |
| 5775 |
case PCRE_NEWLINE_CR: newline = '\r'; break; |
case PCRE_NEWLINE_CR: newline = '\r'; break; |
| 5776 |
case PCRE_NEWLINE_LF: newline = '\n'; break; |
case PCRE_NEWLINE_LF: newline = '\n'; break; |
| 5777 |
case PCRE_NEWLINE_CR+ |
case PCRE_NEWLINE_CR+ |
| 5843 |
found within the regex right at the beginning. Bringing initial option settings |
found within the regex right at the beginning. Bringing initial option settings |
| 5844 |
outside can help speed up starting point checks. */ |
outside can help speed up starting point checks. */ |
| 5845 |
|
|
| 5846 |
|
ptr += skipatstart; |
| 5847 |
code = cworkspace; |
code = cworkspace; |
| 5848 |
*code = OP_BRA; |
*code = OP_BRA; |
| 5849 |
(void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS, |
(void)compile_regex(cd->external_options, cd->external_options & PCRE_IMS, |
| 5913 |
error, errorcode will be set non-zero, so we don't need to look at the result |
error, errorcode will be set non-zero, so we don't need to look at the result |
| 5914 |
of the function here. */ |
of the function here. */ |
| 5915 |
|
|
| 5916 |
ptr = (const uschar *)pattern; |
ptr = (const uschar *)pattern + skipatstart; |
| 5917 |
code = (uschar *)codestart; |
code = (uschar *)codestart; |
| 5918 |
*code = OP_BRA; |
*code = OP_BRA; |
| 5919 |
(void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr, |
(void)compile_regex(re->options, re->options & PCRE_IMS, &code, &ptr, |
| 6021 |
printf("Length = %d top_bracket = %d top_backref = %d\n", |
printf("Length = %d top_bracket = %d top_backref = %d\n", |
| 6022 |
length, re->top_bracket, re->top_backref); |
length, re->top_bracket, re->top_backref); |
| 6023 |
|
|
| 6024 |
if (re->options != 0) |
printf("Options=%08x\n", re->options); |
|
{ |
|
|
printf("%s%s%s%s%s%s%s%s%s\n", |
|
|
((re->options & PCRE_NOPARTIAL) != 0)? "nopartial " : "", |
|
|
((re->options & PCRE_ANCHORED) != 0)? "anchored " : "", |
|
|
((re->options & PCRE_CASELESS) != 0)? "caseless " : "", |
|
|
((re->options & PCRE_EXTENDED) != 0)? "extended " : "", |
|
|
((re->options & PCRE_MULTILINE) != 0)? "multiline " : "", |
|
|
((re->options & PCRE_DOTALL) != 0)? "dotall " : "", |
|
|
((re->options & PCRE_DOLLAR_ENDONLY) != 0)? "endonly " : "", |
|
|
((re->options & PCRE_EXTRA) != 0)? "extra " : "", |
|
|
((re->options & PCRE_UNGREEDY) != 0)? "ungreedy " : ""); |
|
|
} |
|
| 6025 |
|
|
| 6026 |
if ((re->options & PCRE_FIRSTSET) != 0) |
if ((re->options & PCRE_FIRSTSET) != 0) |
| 6027 |
{ |
{ |