| 66 |
rather than bytes. |
rather than bytes. |
| 67 |
|
|
| 68 |
Arguments: |
Arguments: |
| 69 |
code pointer to start of group (the bracket) |
code pointer to start of group (the bracket) |
| 70 |
startcode pointer to start of the whole pattern |
startcode pointer to start of the whole pattern |
| 71 |
options the compiling options |
options the compiling options |
| 72 |
had_accept pointer to flag for (*ACCEPT) encountered |
int RECURSE depth |
|
int RECURSE depth |
|
| 73 |
|
|
| 74 |
Returns: the minimum length |
Returns: the minimum length |
| 75 |
-1 if \C was encountered |
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered |
| 76 |
-2 internal error (missing capturing bracket) |
-2 internal error (missing capturing bracket) |
| 77 |
-3 internal error (opcode not listed) |
-3 internal error (opcode not listed) |
| 78 |
*/ |
*/ |
| 79 |
|
|
| 80 |
static int |
static int |
| 81 |
find_minlength(const uschar *code, const uschar *startcode, int options, |
find_minlength(const uschar *code, const uschar *startcode, int options, |
| 82 |
BOOL *had_accept_ptr, int recurse_depth) |
int recurse_depth) |
| 83 |
{ |
{ |
| 84 |
int length = -1; |
int length = -1; |
| 85 |
BOOL utf8 = (options & PCRE_UTF8) != 0; |
BOOL utf8 = (options & PCRE_UTF8) != 0; |
| 127 |
case OP_BRAPOS: |
case OP_BRAPOS: |
| 128 |
case OP_SBRAPOS: |
case OP_SBRAPOS: |
| 129 |
case OP_ONCE: |
case OP_ONCE: |
| 130 |
d = find_minlength(cc, startcode, options, had_accept_ptr, recurse_depth); |
case OP_ONCE_NC: |
| 131 |
|
d = find_minlength(cc, startcode, options, recurse_depth); |
| 132 |
if (d < 0) return d; |
if (d < 0) return d; |
| 133 |
branchlength += d; |
branchlength += d; |
|
if (*had_accept_ptr) return branchlength; |
|
| 134 |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
| 135 |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
| 136 |
break; |
break; |
| 137 |
|
|
| 138 |
/* Reached end of a branch; if it's a ket it is the end of a nested |
/* ACCEPT makes things far too complicated; we have to give up. */ |
|
call. If it's ALT it is an alternation in a nested call. If it is END it's |
|
|
the end of the outer call. All can be handled by the same code. If it is |
|
|
ACCEPT, it is essentially the same as END, but we set a flag so that |
|
|
counting stops. */ |
|
| 139 |
|
|
| 140 |
case OP_ACCEPT: |
case OP_ACCEPT: |
| 141 |
case OP_ASSERT_ACCEPT: |
case OP_ASSERT_ACCEPT: |
| 142 |
*had_accept_ptr = TRUE; |
return -1; |
| 143 |
/* Fall through */ |
|
| 144 |
|
/* Reached end of a branch; if it's a ket it is the end of a nested |
| 145 |
|
call. If it's ALT it is an alternation in a nested call. If it is END it's |
| 146 |
|
the end of the outer call. All can be handled by the same code. If an |
| 147 |
|
ACCEPT was previously encountered, use the length that was in force at that |
| 148 |
|
time, and pass back the shortest ACCEPT length. */ |
| 149 |
|
|
| 150 |
case OP_ALT: |
case OP_ALT: |
| 151 |
case OP_KET: |
case OP_KET: |
| 152 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 286 |
cc++; |
cc++; |
| 287 |
break; |
break; |
| 288 |
|
|
| 289 |
/* The single-byte matcher means we can't proceed in UTF-8 mode */ |
/* The single-byte matcher means we can't proceed in UTF-8 mode. (In |
| 290 |
|
non-UTF-8 mode \C will actually be turned into OP_ALLANY, so won't ever |
| 291 |
|
appear, but leave the code, just in case.) */ |
| 292 |
|
|
| 293 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 294 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 382 |
} |
} |
| 383 |
else |
else |
| 384 |
{ |
{ |
| 385 |
d = find_minlength(cs, startcode, options, had_accept_ptr, |
d = find_minlength(cs, startcode, options, recurse_depth); |
|
recurse_depth); |
|
|
*had_accept_ptr = FALSE; |
|
| 386 |
} |
} |
| 387 |
} |
} |
| 388 |
else d = 0; |
else d = 0; |
| 425 |
|
|
| 426 |
case OP_RECURSE: |
case OP_RECURSE: |
| 427 |
cs = ce = (uschar *)startcode + GET(cc, 1); |
cs = ce = (uschar *)startcode + GET(cc, 1); |
|
if (cs == NULL) return -2; |
|
| 428 |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
| 429 |
if ((cc > cs && cc < ce) || recurse_depth > 10) |
if ((cc > cs && cc < ce) || recurse_depth > 10) |
| 430 |
had_recurse = TRUE; |
had_recurse = TRUE; |
| 431 |
else |
else |
| 432 |
{ |
{ |
| 433 |
branchlength += find_minlength(cs, startcode, options, had_accept_ptr, |
branchlength += find_minlength(cs, startcode, options, recurse_depth + 1); |
|
recurse_depth + 1); |
|
|
*had_accept_ptr = FALSE; |
|
| 434 |
} |
} |
| 435 |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
| 436 |
break; |
break; |
| 493 |
case OP_MARK: |
case OP_MARK: |
| 494 |
case OP_PRUNE_ARG: |
case OP_PRUNE_ARG: |
| 495 |
case OP_SKIP_ARG: |
case OP_SKIP_ARG: |
|
cc += _pcre_OP_lengths[op] + cc[1]; |
|
|
break; |
|
|
|
|
| 496 |
case OP_THEN_ARG: |
case OP_THEN_ARG: |
| 497 |
cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE]; |
cc += _pcre_OP_lengths[op] + cc[1]; |
| 498 |
break; |
break; |
| 499 |
|
|
| 500 |
/* The remaining opcodes are just skipped over. */ |
/* The remaining opcodes are just skipped over. */ |
| 809 |
case OP_CBRAPOS: |
case OP_CBRAPOS: |
| 810 |
case OP_SCBRAPOS: |
case OP_SCBRAPOS: |
| 811 |
case OP_ONCE: |
case OP_ONCE: |
| 812 |
|
case OP_ONCE_NC: |
| 813 |
case OP_ASSERT: |
case OP_ASSERT: |
| 814 |
rc = set_start_bits(tcode, start_bits, utf8, cd); |
rc = set_start_bits(tcode, start_bits, utf8, cd); |
| 815 |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
| 1224 |
{ |
{ |
| 1225 |
int min; |
int min; |
| 1226 |
BOOL bits_set = FALSE; |
BOOL bits_set = FALSE; |
|
BOOL had_accept = FALSE; |
|
| 1227 |
uschar start_bits[32]; |
uschar start_bits[32]; |
| 1228 |
pcre_extra *extra = NULL; |
pcre_extra *extra = NULL; |
| 1229 |
pcre_study_data *study; |
pcre_study_data *study; |
| 1276 |
rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, |
rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, |
| 1277 |
&compile_block); |
&compile_block); |
| 1278 |
bits_set = rc == SSB_DONE; |
bits_set = rc == SSB_DONE; |
| 1279 |
if (rc == SSB_UNKNOWN) |
if (rc == SSB_UNKNOWN) |
| 1280 |
{ |
{ |
| 1281 |
*errorptr = "internal error: opcode not recognized"; |
*errorptr = "internal error: opcode not recognized"; |
| 1282 |
return NULL; |
return NULL; |
| 1283 |
} |
} |
| 1284 |
} |
} |
| 1285 |
|
|
| 1286 |
/* Find the minimum length of subject string. */ |
/* Find the minimum length of subject string. */ |
| 1287 |
|
|
| 1288 |
switch(min = find_minlength(code, code, re->options, &had_accept, 0)) |
switch(min = find_minlength(code, code, re->options, 0)) |
| 1289 |
{ |
{ |
| 1290 |
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; |
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; |
| 1291 |
case -3: *errorptr = "internal error: opcode not recognized"; return NULL; |
case -3: *errorptr = "internal error: opcode not recognized"; return NULL; |
| 1301 |
so that if it becomes variable in the future, we don't have to change that |
so that if it becomes variable in the future, we don't have to change that |
| 1302 |
code. */ |
code. */ |
| 1303 |
|
|
| 1304 |
if (bits_set || min > 0 |
if (bits_set || min > 0 |
| 1305 |
#ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
| 1306 |
|| (options & PCRE_STUDY_JIT_COMPILE) != 0 |
|| (options & PCRE_STUDY_JIT_COMPILE) != 0 |
| 1307 |
#endif |
#endif |
| 1314 |
*errorptr = "failed to get memory"; |
*errorptr = "failed to get memory"; |
| 1315 |
return NULL; |
return NULL; |
| 1316 |
} |
} |
| 1317 |
|
|
| 1318 |
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra)); |
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra)); |
| 1319 |
extra->flags = PCRE_EXTRA_STUDY_DATA; |
extra->flags = PCRE_EXTRA_STUDY_DATA; |
| 1320 |
extra->study_data = study; |
extra->study_data = study; |
| 1321 |
|
|
| 1322 |
study->size = sizeof(pcre_study_data); |
study->size = sizeof(pcre_study_data); |
| 1323 |
study->flags = 0; |
study->flags = 0; |
| 1324 |
|
|
| 1325 |
if (bits_set) |
/* Set the start bits always, to avoid unset memory errors if the |
| 1326 |
|
study data is written to a file, but set the flag only if any of the bits |
| 1327 |
|
are set, to save time looking when none are. */ |
| 1328 |
|
|
| 1329 |
|
if (bits_set) |
| 1330 |
{ |
{ |
| 1331 |
study->flags |= PCRE_STUDY_MAPPED; |
study->flags |= PCRE_STUDY_MAPPED; |
| 1332 |
memcpy(study->start_bits, start_bits, sizeof(start_bits)); |
memcpy(study->start_bits, start_bits, sizeof(start_bits)); |
| 1333 |
} |
} |
| 1334 |
|
else memset(study->start_bits, 0, 32 * sizeof(uschar)); |
| 1335 |
|
|
| 1336 |
|
/* Always set the minlength value in the block, because the JIT compiler |
| 1337 |
|
makes use of it. However, don't set the bit unless the length is greater than |
| 1338 |
|
zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time |
| 1339 |
|
checking the zero case. */ |
| 1340 |
|
|
| 1341 |
if (min > 0) |
if (min > 0) |
| 1342 |
{ |
{ |
| 1343 |
study->flags |= PCRE_STUDY_MINLEN; |
study->flags |= PCRE_STUDY_MINLEN; |
| 1344 |
study->minlength = min; |
study->minlength = min; |
| 1345 |
} |
} |
| 1346 |
|
else study->minlength = 0; |
| 1347 |
|
|
| 1348 |
/* If JIT support was compiled and requested, attempt the JIT compilation. |
/* If JIT support was compiled and requested, attempt the JIT compilation. |
| 1349 |
If no starting bytes were found, and the minimum length is zero, and JIT |
If no starting bytes were found, and the minimum length is zero, and JIT |
| 1350 |
compilation fails, no flags will be set, so abandon the extra block and |
compilation fails, abandon the extra block and return NULL. */ |
| 1351 |
return NULL. */ |
|
|
|
|
| 1352 |
#ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
| 1353 |
extra->executable_jit = NULL; |
extra->executable_jit = NULL; |
| 1354 |
if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra); |
if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra); |
| 1355 |
if (study->flags == 0) |
if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0) |
| 1356 |
{ |
{ |
| 1357 |
pcre_free_study(extra); |
pcre_free_study(extra); |
| 1358 |
extra = NULL; |
extra = NULL; |
| 1359 |
} |
} |
| 1360 |
#endif |
#endif |
| 1361 |
} |
} |
| 1362 |
|
|
| 1378 |
pcre_free_study(pcre_extra *extra) |
pcre_free_study(pcre_extra *extra) |
| 1379 |
{ |
{ |
| 1380 |
#ifdef SUPPORT_JIT |
#ifdef SUPPORT_JIT |
| 1381 |
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && |
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && |
| 1382 |
extra->executable_jit != NULL) |
extra->executable_jit != NULL) |
| 1383 |
_pcre_jit_free(extra->executable_jit); |
_pcre_jit_free(extra->executable_jit); |
| 1384 |
#endif |
#endif |