| 66 |
rather than bytes. |
rather than bytes. |
| 67 |
|
|
| 68 |
Arguments: |
Arguments: |
| 69 |
code pointer to start of group (the bracket) |
code pointer to start of group (the bracket) |
| 70 |
startcode pointer to start of the whole pattern |
startcode pointer to start of the whole pattern |
| 71 |
options the compiling options |
options the compiling options |
| 72 |
|
had_accept pointer to flag for (*ACCEPT) encountered |
| 73 |
|
int RECURSE depth |
| 74 |
|
|
| 75 |
Returns: the minimum length |
Returns: the minimum length |
| 76 |
-1 if \C was encountered |
-1 if \C was encountered |
| 79 |
*/ |
*/ |
| 80 |
|
|
| 81 |
static int |
static int |
| 82 |
find_minlength(const uschar *code, const uschar *startcode, int options) |
find_minlength(const uschar *code, const uschar *startcode, int options, |
| 83 |
|
BOOL *had_accept_ptr, int recurse_depth) |
| 84 |
{ |
{ |
| 85 |
int length = -1; |
int length = -1; |
| 86 |
BOOL utf8 = (options & PCRE_UTF8) != 0; |
BOOL utf8 = (options & PCRE_UTF8) != 0; |
| 128 |
case OP_BRAPOS: |
case OP_BRAPOS: |
| 129 |
case OP_SBRAPOS: |
case OP_SBRAPOS: |
| 130 |
case OP_ONCE: |
case OP_ONCE: |
| 131 |
d = find_minlength(cc, startcode, options); |
d = find_minlength(cc, startcode, options, had_accept_ptr, recurse_depth); |
| 132 |
if (d < 0) return d; |
if (d < 0) return d; |
| 133 |
branchlength += d; |
branchlength += d; |
| 134 |
|
if (*had_accept_ptr) return branchlength; |
| 135 |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
| 136 |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
| 137 |
break; |
break; |
| 138 |
|
|
| 139 |
/* Reached end of a branch; if it's a ket it is the end of a nested |
/* Reached end of a branch; if it's a ket it is the end of a nested |
| 140 |
call. If it's ALT it is an alternation in a nested call. If it is |
call. If it's ALT it is an alternation in a nested call. If it is END it's |
| 141 |
END it's the end of the outer call. All can be handled by the same code. */ |
the end of the outer call. All can be handled by the same code. If it is |
| 142 |
|
ACCEPT, it is essentially the same as END, but we set a flag so that |
| 143 |
|
counting stops. */ |
| 144 |
|
|
| 145 |
|
case OP_ACCEPT: |
| 146 |
|
case OP_ASSERT_ACCEPT: |
| 147 |
|
*had_accept_ptr = TRUE; |
| 148 |
|
/* Fall through */ |
| 149 |
case OP_ALT: |
case OP_ALT: |
| 150 |
case OP_KET: |
case OP_KET: |
| 151 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 154 |
case OP_END: |
case OP_END: |
| 155 |
if (length < 0 || (!had_recurse && branchlength < length)) |
if (length < 0 || (!had_recurse && branchlength < length)) |
| 156 |
length = branchlength; |
length = branchlength; |
| 157 |
if (*cc != OP_ALT) return length; |
if (op != OP_ALT) return length; |
| 158 |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
| 159 |
branchlength = 0; |
branchlength = 0; |
| 160 |
had_recurse = FALSE; |
had_recurse = FALSE; |
| 277 |
cc++; |
cc++; |
| 278 |
break; |
break; |
| 279 |
|
|
| 280 |
/* "Any newline" might match two characters */ |
/* "Any newline" might match two characters, but it also might match just |
| 281 |
|
one. */ |
| 282 |
|
|
| 283 |
case OP_ANYNL: |
case OP_ANYNL: |
| 284 |
branchlength += 2; |
branchlength += 1; |
| 285 |
cc++; |
cc++; |
| 286 |
break; |
break; |
| 287 |
|
|
| 377 |
d = 0; |
d = 0; |
| 378 |
had_recurse = TRUE; |
had_recurse = TRUE; |
| 379 |
} |
} |
| 380 |
else d = find_minlength(cs, startcode, options); |
else |
| 381 |
|
{ |
| 382 |
|
d = find_minlength(cs, startcode, options, had_accept_ptr, |
| 383 |
|
recurse_depth); |
| 384 |
|
*had_accept_ptr = FALSE; |
| 385 |
|
} |
| 386 |
} |
} |
| 387 |
else d = 0; |
else d = 0; |
| 388 |
cc += 3; |
cc += 3; |
| 419 |
branchlength += min * d; |
branchlength += min * d; |
| 420 |
break; |
break; |
| 421 |
|
|
| 422 |
|
/* We can easily detect direct recursion, but not mutual recursion. This is |
| 423 |
|
caught by a recursion depth count. */ |
| 424 |
|
|
| 425 |
case OP_RECURSE: |
case OP_RECURSE: |
| 426 |
cs = ce = (uschar *)startcode + GET(cc, 1); |
cs = ce = (uschar *)startcode + GET(cc, 1); |
| 427 |
if (cs == NULL) return -2; |
if (cs == NULL) return -2; |
| 428 |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
| 429 |
if (cc > cs && cc < ce) |
if ((cc > cs && cc < ce) || recurse_depth > 10) |
| 430 |
had_recurse = TRUE; |
had_recurse = TRUE; |
| 431 |
else |
else |
| 432 |
branchlength += find_minlength(cs, startcode, options); |
{ |
| 433 |
|
branchlength += find_minlength(cs, startcode, options, had_accept_ptr, |
| 434 |
|
recurse_depth + 1); |
| 435 |
|
*had_accept_ptr = FALSE; |
| 436 |
|
} |
| 437 |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
| 438 |
break; |
break; |
| 439 |
|
|
| 504 |
|
|
| 505 |
/* The remaining opcodes are just skipped over. */ |
/* The remaining opcodes are just skipped over. */ |
| 506 |
|
|
|
case OP_ACCEPT: |
|
| 507 |
case OP_CLOSE: |
case OP_CLOSE: |
| 508 |
case OP_COMMIT: |
case OP_COMMIT: |
| 509 |
case OP_FAIL: |
case OP_FAIL: |
| 709 |
while (try_next) /* Loop for items in this branch */ |
while (try_next) /* Loop for items in this branch */ |
| 710 |
{ |
{ |
| 711 |
int rc; |
int rc; |
| 712 |
|
|
| 713 |
switch(*tcode) |
switch(*tcode) |
| 714 |
{ |
{ |
| 715 |
/* If we reach something we don't understand, it means a new opcode has |
/* If we reach something we don't understand, it means a new opcode has |
| 722 |
/* Fail for a valid opcode that implies no starting bits. */ |
/* Fail for a valid opcode that implies no starting bits. */ |
| 723 |
|
|
| 724 |
case OP_ACCEPT: |
case OP_ACCEPT: |
| 725 |
|
case OP_ASSERT_ACCEPT: |
| 726 |
case OP_ALLANY: |
case OP_ALLANY: |
| 727 |
case OP_ANY: |
case OP_ANY: |
| 728 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 729 |
case OP_CIRC: |
case OP_CIRC: |
| 730 |
case OP_CIRCM: |
case OP_CIRCM: |
| 731 |
case OP_CLOSE: |
case OP_CLOSE: |
| 732 |
case OP_COMMIT: |
case OP_COMMIT: |
| 733 |
case OP_COND: |
case OP_COND: |
| 734 |
case OP_CREF: |
case OP_CREF: |
| 735 |
case OP_DEF: |
case OP_DEF: |
| 736 |
case OP_DOLL: |
case OP_DOLL: |
| 737 |
case OP_DOLLM: |
case OP_DOLLM: |
| 738 |
case OP_END: |
case OP_END: |
| 739 |
case OP_EOD: |
case OP_EOD: |
| 740 |
case OP_EODN: |
case OP_EODN: |
| 741 |
case OP_EXTUNI: |
case OP_EXTUNI: |
| 742 |
case OP_FAIL: |
case OP_FAIL: |
| 743 |
case OP_MARK: |
case OP_MARK: |
| 745 |
case OP_NOT: |
case OP_NOT: |
| 746 |
case OP_NOTEXACT: |
case OP_NOTEXACT: |
| 747 |
case OP_NOTEXACTI: |
case OP_NOTEXACTI: |
| 748 |
case OP_NOTI: |
case OP_NOTI: |
| 749 |
case OP_NOTMINPLUS: |
case OP_NOTMINPLUS: |
| 750 |
case OP_NOTMINPLUSI: |
case OP_NOTMINPLUSI: |
| 751 |
case OP_NOTMINQUERY: |
case OP_NOTMINQUERY: |
| 773 |
case OP_NOTUPTOI: |
case OP_NOTUPTOI: |
| 774 |
case OP_NOT_HSPACE: |
case OP_NOT_HSPACE: |
| 775 |
case OP_NOT_VSPACE: |
case OP_NOT_VSPACE: |
|
case OP_NOT_WORD_BOUNDARY: |
|
| 776 |
case OP_NRREF: |
case OP_NRREF: |
| 777 |
case OP_PROP: |
case OP_PROP: |
| 778 |
case OP_PRUNE: |
case OP_PRUNE: |
| 782 |
case OP_REFI: |
case OP_REFI: |
| 783 |
case OP_REVERSE: |
case OP_REVERSE: |
| 784 |
case OP_RREF: |
case OP_RREF: |
| 785 |
case OP_SCOND: |
case OP_SCOND: |
| 786 |
case OP_SET_SOM: |
case OP_SET_SOM: |
| 787 |
case OP_SKIP: |
case OP_SKIP: |
| 788 |
case OP_SKIP_ARG: |
case OP_SKIP_ARG: |
| 790 |
case OP_SOM: |
case OP_SOM: |
| 791 |
case OP_THEN: |
case OP_THEN: |
| 792 |
case OP_THEN_ARG: |
case OP_THEN_ARG: |
|
case OP_WORD_BOUNDARY: |
|
| 793 |
case OP_XCLASS: |
case OP_XCLASS: |
| 794 |
return SSB_FAIL; |
return SSB_FAIL; |
| 795 |
|
|
| 796 |
|
/* We can ignore word boundary tests. */ |
| 797 |
|
|
| 798 |
|
case OP_WORD_BOUNDARY: |
| 799 |
|
case OP_NOT_WORD_BOUNDARY: |
| 800 |
|
tcode++; |
| 801 |
|
break; |
| 802 |
|
|
| 803 |
/* If we hit a bracket or a positive lookahead assertion, recurse to set |
/* If we hit a bracket or a positive lookahead assertion, recurse to set |
| 804 |
bits from within the subpattern. If it can't find anything, we have to |
bits from within the subpattern. If it can't find anything, we have to |
| 805 |
give up. If it finds some mandatory character(s), we are done for this |
give up. If it finds some mandatory character(s), we are done for this |
| 1165 |
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; |
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; |
| 1166 |
} |
} |
| 1167 |
|
|
| 1168 |
/* Advance past the bit map, and act on what follows. For a zero |
/* Advance past the bit map, and act on what follows. For a zero |
| 1169 |
minimum repeat, continue; otherwise stop processing. */ |
minimum repeat, continue; otherwise stop processing. */ |
| 1170 |
|
|
| 1171 |
tcode += 32; |
tcode += 32; |
| 1183 |
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; |
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; |
| 1184 |
else try_next = FALSE; |
else try_next = FALSE; |
| 1185 |
break; |
break; |
| 1186 |
|
|
| 1187 |
default: |
default: |
| 1188 |
try_next = FALSE; |
try_next = FALSE; |
| 1189 |
break; |
break; |
| 1228 |
{ |
{ |
| 1229 |
int min; |
int min; |
| 1230 |
BOOL bits_set = FALSE; |
BOOL bits_set = FALSE; |
| 1231 |
|
BOOL had_accept = FALSE; |
| 1232 |
uschar start_bits[32]; |
uschar start_bits[32]; |
| 1233 |
pcre_extra *extra; |
pcre_extra *extra; |
| 1234 |
pcre_study_data *study; |
pcre_study_data *study; |
| 1286 |
|
|
| 1287 |
/* Find the minimum length of subject string. */ |
/* Find the minimum length of subject string. */ |
| 1288 |
|
|
| 1289 |
switch(min = find_minlength(code, code, re->options)) |
switch(min = find_minlength(code, code, re->options, &had_accept, 0)) |
| 1290 |
{ |
{ |
| 1291 |
case -2: *errorptr = "internal error: missing capturing bracket"; break; |
case -2: *errorptr = "internal error: missing capturing bracket"; break; |
| 1292 |
case -3: *errorptr = "internal error: opcode not recognized"; break; |
case -3: *errorptr = "internal error: opcode not recognized"; break; |
| 1293 |
default: break; |
default: break; |
| 1294 |
} |
} |
| 1295 |
|
|
| 1296 |
/* Return NULL if there's been an error or if no optimization is possible. */ |
/* Return NULL if there's been an (internal) error or if no optimization is |
| 1297 |
|
possible. A FALSE setting for bits_set is common when there are no obvious |
| 1298 |
|
starting bytes. However a negative value of min occurs only when the pattern |
| 1299 |
|
contains \C, in other words, it's an exceptional case nowadays. */ |
| 1300 |
|
|
| 1301 |
if (*errorptr != NULL || (!bits_set && min < 0)) return NULL; |
if (*errorptr != NULL || (!bits_set && min < 0)) return NULL; |
| 1302 |
|
|
| 1335 |
study->minlength = min; |
study->minlength = min; |
| 1336 |
} |
} |
| 1337 |
|
|
| 1338 |
|
/* If JIT support was compiled and requested, attempt the JIT compilation. */ |
| 1339 |
|
|
| 1340 |
|
extra->executable_jit = NULL; |
| 1341 |
|
#ifdef SUPPORT_JIT |
| 1342 |
|
if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra); |
| 1343 |
|
#endif |
| 1344 |
|
|
| 1345 |
return extra; |
return extra; |
| 1346 |
} |
} |
| 1347 |
|
|
| 1348 |
|
|
| 1349 |
|
/************************************************* |
| 1350 |
|
* Free the study data * |
| 1351 |
|
*************************************************/ |
| 1352 |
|
|
| 1353 |
|
/* This function frees the memory that was obtained by pcre_study(). |
| 1354 |
|
|
| 1355 |
|
Argument: a pointer to the pcre_extra block |
| 1356 |
|
Returns: nothing |
| 1357 |
|
*/ |
| 1358 |
|
|
| 1359 |
|
PCRE_EXP_DEFN void |
| 1360 |
|
pcre_free_study(pcre_extra *extra) |
| 1361 |
|
{ |
| 1362 |
|
#ifdef SUPPORT_JIT |
| 1363 |
|
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && |
| 1364 |
|
extra->executable_jit != NULL) |
| 1365 |
|
_pcre_jit_free(extra->executable_jit); |
| 1366 |
|
#endif |
| 1367 |
|
pcre_free(extra); |
| 1368 |
|
} |
| 1369 |
|
|
| 1370 |
/* End of pcre_study.c */ |
/* End of pcre_study.c */ |