| 68 |
#define BRASTACK_SIZE 200 |
#define BRASTACK_SIZE 200 |
| 69 |
|
|
| 70 |
|
|
|
|
|
| 71 |
/* Maximum number of ints of offset to save on the stack for recursive calls. |
/* Maximum number of ints of offset to save on the stack for recursive calls. |
| 72 |
If the offset vector is bigger, malloc is used. This should be a multiple of 3, |
If the offset vector is bigger, malloc is used. This should be a multiple of 3, |
| 73 |
because the offset vector is always a multiple of 3 long. */ |
because the offset vector is always a multiple of 3 long. */ |
| 83 |
#define MAXLIT 250 |
#define MAXLIT 250 |
| 84 |
|
|
| 85 |
|
|
| 86 |
|
/* The maximum remaining length of subject we are prepared to search for a |
| 87 |
|
req_byte match. */ |
| 88 |
|
|
| 89 |
|
#define REQ_BYTE_MAX 1000 |
| 90 |
|
|
| 91 |
|
|
| 92 |
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
| 93 |
the definition is next to the definition of the opcodes in internal.h. */ |
the definition is next to the definition of the opcodes in internal.h. */ |
| 94 |
|
|
| 1143 |
static const uschar * |
static const uschar * |
| 1144 |
find_bracket(const uschar *code, BOOL utf8, int number) |
find_bracket(const uschar *code, BOOL utf8, int number) |
| 1145 |
{ |
{ |
| 1146 |
|
#ifndef SUPPORT_UTF8 |
| 1147 |
|
utf8 = utf8; /* Stop pedantic compilers complaining */ |
| 1148 |
|
#endif |
| 1149 |
|
|
| 1150 |
for (;;) |
for (;;) |
| 1151 |
{ |
{ |
| 1152 |
register int c = *code; |
register int c = *code; |
| 1462 |
int greedy_default, greedy_non_default; |
int greedy_default, greedy_non_default; |
| 1463 |
int firstbyte, reqbyte; |
int firstbyte, reqbyte; |
| 1464 |
int zeroreqbyte, zerofirstbyte; |
int zeroreqbyte, zerofirstbyte; |
| 1465 |
int req_caseopt; |
int req_caseopt, reqvary, tempreqvary; |
| 1466 |
int condcount = 0; |
int condcount = 0; |
| 1467 |
int options = *optionsptr; |
int options = *optionsptr; |
| 1468 |
register int c; |
register int c; |
| 1708 |
posix_class *= 3; |
posix_class *= 3; |
| 1709 |
for (i = 0; i < 3; i++) |
for (i = 0; i < 3; i++) |
| 1710 |
{ |
{ |
| 1711 |
BOOL isblank = strncmp(ptr, "blank", 5) == 0; |
BOOL isblank = strncmp((char *)ptr, "blank", 5) == 0; |
| 1712 |
int taboffset = posix_class_maps[posix_class + i]; |
int taboffset = posix_class_maps[posix_class + i]; |
| 1713 |
if (taboffset < 0) break; |
if (taboffset < 0) break; |
| 1714 |
if (local_negate) |
if (local_negate) |
| 1958 |
else |
else |
| 1959 |
{ |
{ |
| 1960 |
zerofirstbyte = firstbyte; |
zerofirstbyte = firstbyte; |
| 1961 |
reqbyte = class_lastchar | req_caseopt; |
reqbyte = class_lastchar | req_caseopt | cd->req_varyopt; |
| 1962 |
} |
} |
| 1963 |
*code++ = OP_CHARS; |
*code++ = OP_CHARS; |
| 1964 |
*code++ = 1; |
*code++ = 1; |
| 2062 |
|
|
| 2063 |
if (repeat_min == 0) |
if (repeat_min == 0) |
| 2064 |
{ |
{ |
| 2065 |
firstbyte = zerofirstbyte; /* Adjust for zero repeat */ |
firstbyte = zerofirstbyte; /* Adjust for zero repeat */ |
| 2066 |
reqbyte = zeroreqbyte; /* Ditto */ |
reqbyte = zeroreqbyte; /* Ditto */ |
| 2067 |
} |
} |
| 2068 |
|
|
| 2069 |
|
/* Remember whether this is a variable length repeat */ |
| 2070 |
|
|
| 2071 |
|
reqvary = (repeat_min == repeat_max)? 0 : REQ_VARY; |
| 2072 |
|
|
| 2073 |
op_type = 0; /* Default single-char op codes */ |
op_type = 0; /* Default single-char op codes */ |
| 2074 |
possessive_quantifier = FALSE; /* Default not possessive quantifier */ |
possessive_quantifier = FALSE; /* Default not possessive quantifier */ |
| 2075 |
|
|
| 2155 |
if (code == previous + 2) /* There was only one character */ |
if (code == previous + 2) /* There was only one character */ |
| 2156 |
{ |
{ |
| 2157 |
code = previous; /* Abolish the previous item */ |
code = previous; /* Abolish the previous item */ |
| 2158 |
if (repeat_min > 1) reqbyte = c | req_caseopt; |
if (repeat_min > 1) reqbyte = c | req_caseopt | cd->req_varyopt; |
| 2159 |
} |
} |
| 2160 |
else |
else |
| 2161 |
{ |
{ |
| 2514 |
PUT(tempcode, 1, len); |
PUT(tempcode, 1, len); |
| 2515 |
} |
} |
| 2516 |
|
|
| 2517 |
/* In all case we no longer have a previous item. */ |
/* In all case we no longer have a previous item. We also set the |
| 2518 |
|
"follows varying string" flag for subsequently encountered reqbytes if |
| 2519 |
|
it isn't already set and we have just passed a varying length item. */ |
| 2520 |
|
|
| 2521 |
END_REPEAT: |
END_REPEAT: |
| 2522 |
previous = NULL; |
previous = NULL; |
| 2523 |
|
cd->req_varyopt |= reqvary; |
| 2524 |
break; |
break; |
| 2525 |
|
|
| 2526 |
|
|
| 2569 |
|
|
| 2570 |
else if ((cd->ctypes[ptr[1]] & ctype_digit) != 0) |
else if ((cd->ctypes[ptr[1]] & ctype_digit) != 0) |
| 2571 |
{ |
{ |
| 2572 |
int condref = *(++ptr) - '0'; |
int condref; /* Don't amalgamate; some compilers */ |
| 2573 |
|
condref = *(++ptr) - '0'; /* grumble at autoincrement in declaration */ |
| 2574 |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
| 2575 |
if (condref == 0) |
if (condref == 0) |
| 2576 |
{ |
{ |
| 2636 |
if (*(++ptr) == '<') /* Definition */ |
if (*(++ptr) == '<') /* Definition */ |
| 2637 |
{ |
{ |
| 2638 |
int i, namelen; |
int i, namelen; |
|
const uschar *name = ++ptr; |
|
| 2639 |
uschar *slot = cd->name_table; |
uschar *slot = cd->name_table; |
| 2640 |
|
const uschar *name; /* Don't amalgamate; some compilers */ |
| 2641 |
|
name = ++ptr; /* grumble at autoincrement in declaration */ |
| 2642 |
|
|
| 2643 |
while (*ptr++ != '>'); |
while (*ptr++ != '>'); |
| 2644 |
namelen = ptr - name - 1; |
namelen = ptr - name - 1; |
| 2645 |
|
|
| 2646 |
for (i = 0; i < cd->names_found; i++) |
for (i = 0; i < cd->names_found; i++) |
| 2647 |
{ |
{ |
| 2648 |
int c = memcmp(name, slot+2, namelen + 1); |
int c = memcmp(name, slot+2, namelen); |
| 2649 |
if (c == 0) |
if (c == 0) |
| 2650 |
{ |
{ |
| 2651 |
*errorptr = ERR43; |
if (slot[2+namelen] == 0) |
| 2652 |
goto FAILED; |
{ |
| 2653 |
|
*errorptr = ERR43; |
| 2654 |
|
goto FAILED; |
| 2655 |
|
} |
| 2656 |
|
c = -1; /* Current name is substring */ |
| 2657 |
} |
} |
| 2658 |
if (c < 0) |
if (c < 0) |
| 2659 |
{ |
{ |
| 2683 |
|
|
| 2684 |
for (i = 0; i < cd->names_found; i++) |
for (i = 0; i < cd->names_found; i++) |
| 2685 |
{ |
{ |
| 2686 |
if (strncmp(name, slot+2, namelen) == 0) break; |
if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break; |
| 2687 |
slot += cd->name_entry_size; |
slot += cd->name_entry_size; |
| 2688 |
} |
} |
| 2689 |
if (i >= cd->names_found) |
if (i >= cd->names_found) |
| 2861 |
previous = (bravalue >= OP_ONCE)? code : NULL; |
previous = (bravalue >= OP_ONCE)? code : NULL; |
| 2862 |
*code = bravalue; |
*code = bravalue; |
| 2863 |
tempcode = code; |
tempcode = code; |
| 2864 |
|
tempreqvary = cd->req_varyopt; /* Save value before bracket */ |
| 2865 |
|
|
| 2866 |
if (!compile_regex( |
if (!compile_regex( |
| 2867 |
newoptions, /* The complete new option state */ |
newoptions, /* The complete new option state */ |
| 2940 |
} |
} |
| 2941 |
|
|
| 2942 |
/* If firstbyte was previously set, convert the subpattern's firstbyte |
/* If firstbyte was previously set, convert the subpattern's firstbyte |
| 2943 |
into reqbyte if there wasn't one. */ |
into reqbyte if there wasn't one, using the vary flag that was in |
| 2944 |
|
existence beforehand. */ |
| 2945 |
|
|
| 2946 |
else if (subfirstbyte >= 0 && subreqbyte < 0) subreqbyte = subfirstbyte; |
else if (subfirstbyte >= 0 && subreqbyte < 0) |
| 2947 |
|
subreqbyte = subfirstbyte | tempreqvary; |
| 2948 |
|
|
| 2949 |
/* If the subpattern set a required char (or set a first char that isn't |
/* If the subpattern set a required byte (or set a first byte that isn't |
| 2950 |
really the first char - see above), set it. */ |
really the first byte - see above), set it. */ |
| 2951 |
|
|
| 2952 |
if (subreqbyte >= 0) reqbyte = subreqbyte; |
if (subreqbyte >= 0) reqbyte = subreqbyte; |
| 2953 |
} |
} |
| 3165 |
if (firstbyte == REQ_UNSET) |
if (firstbyte == REQ_UNSET) |
| 3166 |
{ |
{ |
| 3167 |
zerofirstbyte = firstbyte = previous[2] | req_caseopt; |
zerofirstbyte = firstbyte = previous[2] | req_caseopt; |
| 3168 |
zeroreqbyte = (t - 1 == previous + 2)? reqbyte : t[-1] | req_caseopt; |
zeroreqbyte = (t - 1 == previous + 2)? |
| 3169 |
|
reqbyte : t[-1] | req_caseopt | cd->req_varyopt; |
| 3170 |
} |
} |
| 3171 |
|
|
| 3172 |
/* If there was a previous first byte, leave it alone, and don't change |
/* If there was a previous first byte, leave it alone, and don't change |
| 3176 |
else |
else |
| 3177 |
{ |
{ |
| 3178 |
zerofirstbyte = firstbyte; |
zerofirstbyte = firstbyte; |
| 3179 |
zeroreqbyte = t[-1] | req_caseopt; |
zeroreqbyte = t[-1] | req_caseopt | cd->req_varyopt; |
| 3180 |
} |
} |
| 3181 |
} |
} |
| 3182 |
|
|
| 3183 |
/* In all cases (we know length > 1), the new required byte is the last |
/* In all cases (we know length > 1), the new required byte is the last |
| 3184 |
byte of the string. */ |
byte of the string. */ |
| 3185 |
|
|
| 3186 |
reqbyte = code[-1] | req_caseopt; |
reqbyte = code[-1] | req_caseopt | cd->req_varyopt; |
| 3187 |
} |
} |
| 3188 |
|
|
| 3189 |
else /* End of UTF-8 coding */ |
else /* End of UTF-8 coding */ |
| 3206 |
else |
else |
| 3207 |
{ |
{ |
| 3208 |
zerofirstbyte = firstbyte = previous[2] | req_caseopt; |
zerofirstbyte = firstbyte = previous[2] | req_caseopt; |
| 3209 |
zeroreqbyte = (length > 2)? (code[-2] | req_caseopt) : reqbyte; |
zeroreqbyte = (length > 2)? |
| 3210 |
reqbyte = code[-1] | req_caseopt; |
(code[-2] | req_caseopt | cd->req_varyopt) : reqbyte; |
| 3211 |
|
reqbyte = code[-1] | req_caseopt | cd->req_varyopt; |
| 3212 |
} |
} |
| 3213 |
} |
} |
| 3214 |
|
|
| 3217 |
else |
else |
| 3218 |
{ |
{ |
| 3219 |
zerofirstbyte = firstbyte; |
zerofirstbyte = firstbyte; |
| 3220 |
zeroreqbyte = (length == 1)? reqbyte : code[-2] | req_caseopt; |
zeroreqbyte = (length == 1)? reqbyte : |
| 3221 |
reqbyte = code[-1] | req_caseopt; |
code[-2] | req_caseopt | cd->req_varyopt; |
| 3222 |
|
reqbyte = code[-1] | req_caseopt | cd->req_varyopt; |
| 3223 |
} |
} |
| 3224 |
} |
} |
| 3225 |
|
|
| 3336 |
} |
} |
| 3337 |
|
|
| 3338 |
/* If this is not the first branch, the first char and reqbyte have to |
/* If this is not the first branch, the first char and reqbyte have to |
| 3339 |
match the values from all the previous branches. */ |
match the values from all the previous branches, except that if the previous |
| 3340 |
|
value for reqbyte didn't have REQ_VARY set, it can still match, and we set |
| 3341 |
|
REQ_VARY for the regex. */ |
| 3342 |
|
|
| 3343 |
else |
else |
| 3344 |
{ |
{ |
| 3360 |
|
|
| 3361 |
/* Now ensure that the reqbytes match */ |
/* Now ensure that the reqbytes match */ |
| 3362 |
|
|
| 3363 |
if (reqbyte != branchreqbyte) reqbyte = REQ_NONE; |
if ((reqbyte & ~REQ_VARY) != (branchreqbyte & ~REQ_VARY)) |
| 3364 |
|
reqbyte = REQ_NONE; |
| 3365 |
|
else reqbyte |= branchreqbyte; /* To "or" REQ_VARY */ |
| 3366 |
} |
} |
| 3367 |
|
|
| 3368 |
/* If lookbehind, check that this branch matches a fixed-length string, |
/* If lookbehind, check that this branch matches a fixed-length string, |
| 4200 |
ptr += 3; |
ptr += 3; |
| 4201 |
if (*ptr == '<') |
if (*ptr == '<') |
| 4202 |
{ |
{ |
| 4203 |
const uschar *p = ++ptr; |
const uschar *p; /* Don't amalgamate; some compilers */ |
| 4204 |
|
p = ++ptr; /* grumble at autoincrement in declaration */ |
| 4205 |
while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++; |
while ((compile_block.ctypes[*ptr] & ctype_word) != 0) ptr++; |
| 4206 |
if (*ptr != '>') |
if (*ptr != '>') |
| 4207 |
{ |
{ |
| 4632 |
compile_block.name_table = (uschar *)re + sizeof(real_pcre); |
compile_block.name_table = (uschar *)re + sizeof(real_pcre); |
| 4633 |
codestart = compile_block.name_table + re->name_entry_size * re->name_count; |
codestart = compile_block.name_table + re->name_entry_size * re->name_count; |
| 4634 |
compile_block.start_code = codestart; |
compile_block.start_code = codestart; |
| 4635 |
|
compile_block.req_varyopt = 0; |
| 4636 |
|
|
| 4637 |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
| 4638 |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
| 4706 |
} |
} |
| 4707 |
} |
} |
| 4708 |
|
|
| 4709 |
/* Save the last required character if any. Remove caseless flag for |
/* For an anchored pattern, we use the "required byte" only if it follows a |
| 4710 |
non-caseable chars. */ |
variable length item in the regex. Remove the caseless flag for non-caseable |
| 4711 |
|
chars. */ |
| 4712 |
|
|
| 4713 |
if ((re->options & PCRE_ANCHORED) != 0 && reqbyte < 0 && firstbyte >= 0) |
if (reqbyte >= 0 && |
| 4714 |
reqbyte = firstbyte; |
((re->options & PCRE_ANCHORED) == 0 || (reqbyte & REQ_VARY) != 0)) |
|
|
|
|
if (reqbyte >= 0) |
|
| 4715 |
{ |
{ |
| 4716 |
int ch = reqbyte & 255; |
int ch = reqbyte & 255; |
| 4717 |
re->req_byte = ((reqbyte & REQ_CASELESS) != 0 && |
re->req_byte = ((reqbyte & REQ_CASELESS) != 0 && |
| 5296 |
(pcre_free)(new_recursive.offset_save); |
(pcre_free)(new_recursive.offset_save); |
| 5297 |
return MATCH_NOMATCH; |
return MATCH_NOMATCH; |
| 5298 |
} |
} |
| 5299 |
break; |
/* Control never reaches here */ |
| 5300 |
|
|
| 5301 |
/* "Once" brackets are like assertion brackets except that after a match, |
/* "Once" brackets are like assertion brackets except that after a match, |
| 5302 |
the point in the subject string is not moved back. Thus there can never be |
the point in the subject string is not moved back. Thus there can never be |
| 7403 |
optimization can save a huge amount of backtracking in patterns with nested |
optimization can save a huge amount of backtracking in patterns with nested |
| 7404 |
unlimited repeats that aren't going to match. Writing separate code for |
unlimited repeats that aren't going to match. Writing separate code for |
| 7405 |
cased/caseless versions makes it go faster, as does using an autoincrement |
cased/caseless versions makes it go faster, as does using an autoincrement |
| 7406 |
and backing off on a match. */ |
and backing off on a match. |
| 7407 |
|
|
| 7408 |
|
HOWEVER: when the subject string is very, very long, searching to its end can |
| 7409 |
|
take a long time, and give bad performance on quite ordinary patterns. This |
| 7410 |
|
showed up when somebody was matching /^C/ on a 32-megabyte string... so we |
| 7411 |
|
don't do this when the string is sufficiently long. */ |
| 7412 |
|
|
| 7413 |
if (req_byte >= 0) |
if (req_byte >= 0 && end_subject - start_match < REQ_BYTE_MAX) |
| 7414 |
{ |
{ |
| 7415 |
register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0); |
register const uschar *p = start_match + ((first_byte >= 0)? 1 : 0); |
| 7416 |
|
|