| 66 |
rather than bytes. |
rather than bytes. |
| 67 |
|
|
| 68 |
Arguments: |
Arguments: |
| 69 |
code pointer to start of group (the bracket) |
code pointer to start of group (the bracket) |
| 70 |
startcode pointer to start of the whole pattern |
startcode pointer to start of the whole pattern |
| 71 |
options the compiling options |
options the compiling options |
| 72 |
had_accept pointer to flag for (*ACCEPT) encountered |
int RECURSE depth |
|
int RECURSE depth |
|
| 73 |
|
|
| 74 |
Returns: the minimum length |
Returns: the minimum length |
| 75 |
-1 if \C was encountered |
-1 if \C in UTF-8 mode or (*ACCEPT) was encountered |
| 76 |
-2 internal error (missing capturing bracket) |
-2 internal error (missing capturing bracket) |
| 77 |
-3 internal error (opcode not listed) |
-3 internal error (opcode not listed) |
| 78 |
*/ |
*/ |
| 79 |
|
|
| 80 |
static int |
static int |
| 81 |
find_minlength(const uschar *code, const uschar *startcode, int options, |
find_minlength(const uschar *code, const uschar *startcode, int options, |
| 82 |
BOOL *had_accept_ptr, int recurse_depth) |
int recurse_depth) |
| 83 |
{ |
{ |
| 84 |
int length = -1; |
int length = -1; |
| 85 |
BOOL utf8 = (options & PCRE_UTF8) != 0; |
BOOL utf8 = (options & PCRE_UTF8) != 0; |
| 127 |
case OP_BRAPOS: |
case OP_BRAPOS: |
| 128 |
case OP_SBRAPOS: |
case OP_SBRAPOS: |
| 129 |
case OP_ONCE: |
case OP_ONCE: |
| 130 |
d = find_minlength(cc, startcode, options, had_accept_ptr, recurse_depth); |
case OP_ONCE_NC: |
| 131 |
|
d = find_minlength(cc, startcode, options, recurse_depth); |
| 132 |
if (d < 0) return d; |
if (d < 0) return d; |
| 133 |
branchlength += d; |
branchlength += d; |
|
if (*had_accept_ptr) return branchlength; |
|
| 134 |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
| 135 |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
| 136 |
break; |
break; |
| 137 |
|
|
| 138 |
/* Reached end of a branch; if it's a ket it is the end of a nested |
/* ACCEPT makes things far too complicated; we have to give up. */ |
|
call. If it's ALT it is an alternation in a nested call. If it is END it's |
|
|
the end of the outer call. All can be handled by the same code. If it is |
|
|
ACCEPT, it is essentially the same as END, but we set a flag so that |
|
|
counting stops. */ |
|
| 139 |
|
|
| 140 |
case OP_ACCEPT: |
case OP_ACCEPT: |
| 141 |
case OP_ASSERT_ACCEPT: |
case OP_ASSERT_ACCEPT: |
| 142 |
*had_accept_ptr = TRUE; |
return -1; |
| 143 |
/* Fall through */ |
|
| 144 |
|
/* Reached end of a branch; if it's a ket it is the end of a nested |
| 145 |
|
call. If it's ALT it is an alternation in a nested call. If it is END it's |
| 146 |
|
the end of the outer call. All can be handled by the same code. If an |
| 147 |
|
ACCEPT was previously encountered, use the length that was in force at that |
| 148 |
|
time, and pass back the shortest ACCEPT length. */ |
| 149 |
|
|
| 150 |
case OP_ALT: |
case OP_ALT: |
| 151 |
case OP_KET: |
case OP_KET: |
| 152 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 286 |
cc++; |
cc++; |
| 287 |
break; |
break; |
| 288 |
|
|
| 289 |
/* The single-byte matcher means we can't proceed in UTF-8 mode */ |
/* The single-byte matcher means we can't proceed in UTF-8 mode. (In |
| 290 |
|
non-UTF-8 mode \C will actually be turned into OP_ALLANY, so won't ever |
| 291 |
|
appear, but leave the code, just in case.) */ |
| 292 |
|
|
| 293 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 294 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 382 |
} |
} |
| 383 |
else |
else |
| 384 |
{ |
{ |
| 385 |
d = find_minlength(cs, startcode, options, had_accept_ptr, |
d = find_minlength(cs, startcode, options, recurse_depth); |
|
recurse_depth); |
|
|
*had_accept_ptr = FALSE; |
|
| 386 |
} |
} |
| 387 |
} |
} |
| 388 |
else d = 0; |
else d = 0; |
| 425 |
|
|
| 426 |
case OP_RECURSE: |
case OP_RECURSE: |
| 427 |
cs = ce = (uschar *)startcode + GET(cc, 1); |
cs = ce = (uschar *)startcode + GET(cc, 1); |
|
if (cs == NULL) return -2; |
|
| 428 |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
| 429 |
if ((cc > cs && cc < ce) || recurse_depth > 10) |
if ((cc > cs && cc < ce) || recurse_depth > 10) |
| 430 |
had_recurse = TRUE; |
had_recurse = TRUE; |
| 431 |
else |
else |
| 432 |
{ |
{ |
| 433 |
branchlength += find_minlength(cs, startcode, options, had_accept_ptr, |
branchlength += find_minlength(cs, startcode, options, recurse_depth + 1); |
|
recurse_depth + 1); |
|
|
*had_accept_ptr = FALSE; |
|
| 434 |
} |
} |
| 435 |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
| 436 |
break; |
break; |
| 493 |
case OP_MARK: |
case OP_MARK: |
| 494 |
case OP_PRUNE_ARG: |
case OP_PRUNE_ARG: |
| 495 |
case OP_SKIP_ARG: |
case OP_SKIP_ARG: |
|
cc += _pcre_OP_lengths[op] + cc[1]; |
|
|
break; |
|
|
|
|
| 496 |
case OP_THEN_ARG: |
case OP_THEN_ARG: |
| 497 |
cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE]; |
cc += _pcre_OP_lengths[op] + cc[1]; |
| 498 |
break; |
break; |
| 499 |
|
|
| 500 |
/* The remaining opcodes are just skipped over. */ |
/* The remaining opcodes are just skipped over. */ |
| 789 |
return SSB_FAIL; |
return SSB_FAIL; |
| 790 |
|
|
| 791 |
/* We can ignore word boundary tests. */ |
/* We can ignore word boundary tests. */ |
| 792 |
|
|
| 793 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
| 794 |
case OP_NOT_WORD_BOUNDARY: |
case OP_NOT_WORD_BOUNDARY: |
| 795 |
tcode++; |
tcode++; |
| 796 |
break; |
break; |
| 797 |
|
|
| 798 |
/* If we hit a bracket or a positive lookahead assertion, recurse to set |
/* If we hit a bracket or a positive lookahead assertion, recurse to set |
| 799 |
bits from within the subpattern. If it can't find anything, we have to |
bits from within the subpattern. If it can't find anything, we have to |
| 809 |
case OP_CBRAPOS: |
case OP_CBRAPOS: |
| 810 |
case OP_SCBRAPOS: |
case OP_SCBRAPOS: |
| 811 |
case OP_ONCE: |
case OP_ONCE: |
| 812 |
|
case OP_ONCE_NC: |
| 813 |
case OP_ASSERT: |
case OP_ASSERT: |
| 814 |
rc = set_start_bits(tcode, start_bits, utf8, cd); |
rc = set_start_bits(tcode, start_bits, utf8, cd); |
| 815 |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
| 1224 |
{ |
{ |
| 1225 |
int min; |
int min; |
| 1226 |
BOOL bits_set = FALSE; |
BOOL bits_set = FALSE; |
|
BOOL had_accept = FALSE; |
|
| 1227 |
uschar start_bits[32]; |
uschar start_bits[32]; |
| 1228 |
pcre_extra *extra; |
pcre_extra *extra = NULL; |
| 1229 |
pcre_study_data *study; |
pcre_study_data *study; |
| 1230 |
const uschar *tables; |
const uschar *tables; |
| 1231 |
uschar *code; |
uschar *code; |
| 1276 |
rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, |
rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, |
| 1277 |
&compile_block); |
&compile_block); |
| 1278 |
bits_set = rc == SSB_DONE; |
bits_set = rc == SSB_DONE; |
| 1279 |
if (rc == SSB_UNKNOWN) *errorptr = "internal error: opcode not recognized"; |
if (rc == SSB_UNKNOWN) |
| 1280 |
|
{ |
| 1281 |
|
*errorptr = "internal error: opcode not recognized"; |
| 1282 |
|
return NULL; |
| 1283 |
|
} |
| 1284 |
} |
} |
| 1285 |
|
|
| 1286 |
/* Find the minimum length of subject string. */ |
/* Find the minimum length of subject string. */ |
| 1287 |
|
|
| 1288 |
switch(min = find_minlength(code, code, re->options, &had_accept, 0)) |
switch(min = find_minlength(code, code, re->options, 0)) |
| 1289 |
{ |
{ |
| 1290 |
case -2: *errorptr = "internal error: missing capturing bracket"; break; |
case -2: *errorptr = "internal error: missing capturing bracket"; return NULL; |
| 1291 |
case -3: *errorptr = "internal error: opcode not recognized"; break; |
case -3: *errorptr = "internal error: opcode not recognized"; return NULL; |
| 1292 |
default: break; |
default: break; |
| 1293 |
} |
} |
| 1294 |
|
|
| 1295 |
/* Return NULL if there's been an error or if no optimization is possible. */ |
/* If a set of starting bytes has been identified, or if the minimum length is |
| 1296 |
|
greater than zero, or if JIT optimization has been requested, get a pcre_extra |
| 1297 |
|
block and a pcre_study_data block. The study data is put in the latter, which |
| 1298 |
|
is pointed to by the former, which may also get additional data set later by |
| 1299 |
|
the calling program. At the moment, the size of pcre_study_data is fixed. We |
| 1300 |
|
nevertheless save it in a field for returning via the pcre_fullinfo() function |
| 1301 |
|
so that if it becomes variable in the future, we don't have to change that |
| 1302 |
|
code. */ |
| 1303 |
|
|
| 1304 |
|
if (bits_set || min > 0 |
| 1305 |
|
#ifdef SUPPORT_JIT |
| 1306 |
|
|| (options & PCRE_STUDY_JIT_COMPILE) != 0 |
| 1307 |
|
#endif |
| 1308 |
|
) |
| 1309 |
|
{ |
| 1310 |
|
extra = (pcre_extra *)(pcre_malloc) |
| 1311 |
|
(sizeof(pcre_extra) + sizeof(pcre_study_data)); |
| 1312 |
|
if (extra == NULL) |
| 1313 |
|
{ |
| 1314 |
|
*errorptr = "failed to get memory"; |
| 1315 |
|
return NULL; |
| 1316 |
|
} |
| 1317 |
|
|
| 1318 |
if (*errorptr != NULL || (!bits_set && min < 0)) return NULL; |
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra)); |
| 1319 |
|
extra->flags = PCRE_EXTRA_STUDY_DATA; |
| 1320 |
|
extra->study_data = study; |
| 1321 |
|
|
| 1322 |
|
study->size = sizeof(pcre_study_data); |
| 1323 |
|
study->flags = 0; |
| 1324 |
|
|
| 1325 |
|
/* Set the start bits always, to avoid unset memory errors if the |
| 1326 |
|
study data is written to a file, but set the flag only if any of the bits |
| 1327 |
|
are set, to save time looking when none are. */ |
| 1328 |
|
|
| 1329 |
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in |
if (bits_set) |
| 1330 |
the latter, which is pointed to by the former, which may also get additional |
{ |
| 1331 |
data set later by the calling program. At the moment, the size of |
study->flags |= PCRE_STUDY_MAPPED; |
| 1332 |
pcre_study_data is fixed. We nevertheless save it in a field for returning via |
memcpy(study->start_bits, start_bits, sizeof(start_bits)); |
| 1333 |
the pcre_fullinfo() function so that if it becomes variable in the future, we |
} |
| 1334 |
don't have to change that code. */ |
else memset(study->start_bits, 0, 32 * sizeof(uschar)); |
| 1335 |
|
|
| 1336 |
extra = (pcre_extra *)(pcre_malloc) |
/* Always set the minlength value in the block, because the JIT compiler |
| 1337 |
(sizeof(pcre_extra) + sizeof(pcre_study_data)); |
makes use of it. However, don't set the bit unless the length is greater than |
| 1338 |
|
zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time |
| 1339 |
|
checking the zero case. */ |
| 1340 |
|
|
| 1341 |
if (extra == NULL) |
if (min > 0) |
| 1342 |
{ |
{ |
| 1343 |
*errorptr = "failed to get memory"; |
study->flags |= PCRE_STUDY_MINLEN; |
| 1344 |
return NULL; |
study->minlength = min; |
| 1345 |
|
} |
| 1346 |
|
else study->minlength = 0; |
| 1347 |
|
|
| 1348 |
|
/* If JIT support was compiled and requested, attempt the JIT compilation. |
| 1349 |
|
If no starting bytes were found, and the minimum length is zero, and JIT |
| 1350 |
|
compilation fails, abandon the extra block and return NULL. */ |
| 1351 |
|
|
| 1352 |
|
#ifdef SUPPORT_JIT |
| 1353 |
|
extra->executable_jit = NULL; |
| 1354 |
|
if ((options & PCRE_STUDY_JIT_COMPILE) != 0) _pcre_jit_compile(re, extra); |
| 1355 |
|
if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0) |
| 1356 |
|
{ |
| 1357 |
|
pcre_free_study(extra); |
| 1358 |
|
extra = NULL; |
| 1359 |
|
} |
| 1360 |
|
#endif |
| 1361 |
} |
} |
| 1362 |
|
|
| 1363 |
study = (pcre_study_data *)((char *)extra + sizeof(pcre_extra)); |
return extra; |
| 1364 |
extra->flags = PCRE_EXTRA_STUDY_DATA; |
} |
|
extra->study_data = study; |
|
| 1365 |
|
|
|
study->size = sizeof(pcre_study_data); |
|
|
study->flags = 0; |
|
| 1366 |
|
|
| 1367 |
if (bits_set) |
/************************************************* |
| 1368 |
{ |
* Free the study data * |
| 1369 |
study->flags |= PCRE_STUDY_MAPPED; |
*************************************************/ |
|
memcpy(study->start_bits, start_bits, sizeof(start_bits)); |
|
|
} |
|
| 1370 |
|
|
| 1371 |
if (min >= 0) |
/* This function frees the memory that was obtained by pcre_study(). |
|
{ |
|
|
study->flags |= PCRE_STUDY_MINLEN; |
|
|
study->minlength = min; |
|
|
} |
|
| 1372 |
|
|
| 1373 |
return extra; |
Argument: a pointer to the pcre_extra block |
| 1374 |
|
Returns: nothing |
| 1375 |
|
*/ |
| 1376 |
|
|
| 1377 |
|
PCRE_EXP_DEFN void |
| 1378 |
|
pcre_free_study(pcre_extra *extra) |
| 1379 |
|
{ |
| 1380 |
|
#ifdef SUPPORT_JIT |
| 1381 |
|
if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 && |
| 1382 |
|
extra->executable_jit != NULL) |
| 1383 |
|
_pcre_jit_free(extra->executable_jit); |
| 1384 |
|
#endif |
| 1385 |
|
pcre_free(extra); |
| 1386 |
} |
} |
| 1387 |
|
|
| 1388 |
/* End of pcre_study.c */ |
/* End of pcre_study.c */ |