| 1331 |
|
|
| 1332 |
|
|
| 1333 |
/************************************************* |
/************************************************* |
| 1334 |
* Find the fixed length of a pattern * |
* Find the fixed length of a branch * |
| 1335 |
*************************************************/ |
*************************************************/ |
| 1336 |
|
|
| 1337 |
/* Scan a pattern and compute the fixed length of subject that will match it, |
/* Scan a branch and compute the fixed length of subject that will match it, |
| 1338 |
if the length is fixed. This is needed for dealing with backward assertions. |
if the length is fixed. This is needed for dealing with backward assertions. |
| 1339 |
In UTF8 mode, the result is in characters rather than bytes. |
In UTF8 mode, the result is in characters rather than bytes. The branch is |
| 1340 |
|
temporarily terminated with OP_END when this function is called. |
| 1341 |
|
|
| 1342 |
|
This function is called when a backward assertion is encountered, so that if it |
| 1343 |
|
fails, the error message can point to the correct place in the pattern. |
| 1344 |
|
However, we cannot do this when the assertion contains subroutine calls, |
| 1345 |
|
because they can be forward references. We solve this by remembering this case |
| 1346 |
|
and doing the check at the end; a flag specifies which mode we are running in. |
| 1347 |
|
|
| 1348 |
Arguments: |
Arguments: |
| 1349 |
code points to the start of the pattern (the bracket) |
code points to the start of the pattern (the bracket) |
| 1350 |
options the compiling options |
options the compiling options |
| 1351 |
|
atend TRUE if called when the pattern is complete |
| 1352 |
|
cd the "compile data" structure |
| 1353 |
|
|
| 1354 |
Returns: the fixed length, or -1 if there is no fixed length, |
Returns: the fixed length, |
| 1355 |
|
or -1 if there is no fixed length, |
| 1356 |
or -2 if \C was encountered |
or -2 if \C was encountered |
| 1357 |
|
or -3 if an OP_RECURSE item was encountered and atend is FALSE |
| 1358 |
*/ |
*/ |
| 1359 |
|
|
| 1360 |
static int |
static int |
| 1361 |
find_fixedlength(uschar *code, int options) |
find_fixedlength(uschar *code, int options, BOOL atend, compile_data *cd) |
| 1362 |
{ |
{ |
| 1363 |
int length = -1; |
int length = -1; |
| 1364 |
|
|
| 1371 |
for (;;) |
for (;;) |
| 1372 |
{ |
{ |
| 1373 |
int d; |
int d; |
| 1374 |
|
uschar *ce, *cs; |
| 1375 |
register int op = *cc; |
register int op = *cc; |
| 1376 |
switch (op) |
switch (op) |
| 1377 |
{ |
{ |
| 1379 |
case OP_BRA: |
case OP_BRA: |
| 1380 |
case OP_ONCE: |
case OP_ONCE: |
| 1381 |
case OP_COND: |
case OP_COND: |
| 1382 |
d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options); |
d = find_fixedlength(cc + ((op == OP_CBRA)? 2:0), options, atend, cd); |
| 1383 |
if (d < 0) return d; |
if (d < 0) return d; |
| 1384 |
branchlength += d; |
branchlength += d; |
| 1385 |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
do cc += GET(cc, 1); while (*cc == OP_ALT); |
| 1401 |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
| 1402 |
branchlength = 0; |
branchlength = 0; |
| 1403 |
break; |
break; |
| 1404 |
|
|
| 1405 |
|
/* A true recursion implies not fixed length, but a subroutine call may |
| 1406 |
|
be OK. If the subroutine is a forward reference, we can't deal with |
| 1407 |
|
it until the end of the pattern, so return -3. */ |
| 1408 |
|
|
| 1409 |
|
case OP_RECURSE: |
| 1410 |
|
if (!atend) return -3; |
| 1411 |
|
cs = ce = (uschar *)cd->start_code + GET(cc, 1); /* Start subpattern */ |
| 1412 |
|
do ce += GET(ce, 1); while (*ce == OP_ALT); /* End subpattern */ |
| 1413 |
|
if (cc > cs && cc < ce) return -1; /* Recursion */ |
| 1414 |
|
d = find_fixedlength(cs + 2, options, atend, cd); |
| 1415 |
|
if (d < 0) return d; |
| 1416 |
|
branchlength += d; |
| 1417 |
|
cc += 1 + LINK_SIZE; |
| 1418 |
|
break; |
| 1419 |
|
|
| 1420 |
/* Skip over assertive subpatterns */ |
/* Skip over assertive subpatterns */ |
| 1421 |
|
|
| 1545 |
|
|
| 1546 |
|
|
| 1547 |
/************************************************* |
/************************************************* |
| 1548 |
* Scan compiled regex for numbered bracket * |
* Scan compiled regex for specific bracket * |
| 1549 |
*************************************************/ |
*************************************************/ |
| 1550 |
|
|
| 1551 |
/* This little function scans through a compiled pattern until it finds a |
/* This little function scans through a compiled pattern until it finds a |
| 1552 |
capturing bracket with the given number. |
capturing bracket with the given number, or, if the number is negative, an |
| 1553 |
|
instance of OP_REVERSE for a lookbehind. |
| 1554 |
|
|
| 1555 |
Arguments: |
Arguments: |
| 1556 |
code points to start of expression |
code points to start of expression |
| 1557 |
utf8 TRUE in UTF-8 mode |
utf8 TRUE in UTF-8 mode |
| 1558 |
number the required bracket number |
number the required bracket number or negative to find a lookbehind |
| 1559 |
|
|
| 1560 |
Returns: pointer to the opcode for the bracket, or NULL if not found |
Returns: pointer to the opcode for the bracket, or NULL if not found |
| 1561 |
*/ |
*/ |
| 1573 |
the table is zero; the actual length is stored in the compiled code. */ |
the table is zero; the actual length is stored in the compiled code. */ |
| 1574 |
|
|
| 1575 |
if (c == OP_XCLASS) code += GET(code, 1); |
if (c == OP_XCLASS) code += GET(code, 1); |
| 1576 |
|
|
| 1577 |
|
/* Handle recursion */ |
| 1578 |
|
|
| 1579 |
|
else if (c == OP_REVERSE) |
| 1580 |
|
{ |
| 1581 |
|
if (number < 0) return (uschar *)code; |
| 1582 |
|
code += _pcre_OP_lengths[c]; |
| 1583 |
|
} |
| 1584 |
|
|
| 1585 |
/* Handle capturing bracket */ |
/* Handle capturing bracket */ |
| 1586 |
|
|
| 5849 |
|
|
| 5850 |
/* If lookbehind, check that this branch matches a fixed-length string, and |
/* If lookbehind, check that this branch matches a fixed-length string, and |
| 5851 |
put the length into the OP_REVERSE item. Temporarily mark the end of the |
put the length into the OP_REVERSE item. Temporarily mark the end of the |
| 5852 |
branch with OP_END. */ |
branch with OP_END. If the branch contains OP_RECURSE, the result is -3 |
| 5853 |
|
because there may be forward references that we can't check here. Set a |
| 5854 |
|
flag to cause another lookbehind check at the end. Why not do it all at the |
| 5855 |
|
end? Because common, erroneous checks are picked up here and the offset of |
| 5856 |
|
the problem can be shown. */ |
| 5857 |
|
|
| 5858 |
if (lookbehind) |
if (lookbehind) |
| 5859 |
{ |
{ |
| 5860 |
int fixed_length; |
int fixed_length; |
| 5861 |
*code = OP_END; |
*code = OP_END; |
| 5862 |
fixed_length = find_fixedlength(last_branch, options); |
fixed_length = find_fixedlength(last_branch, options, FALSE, cd); |
| 5863 |
DPRINTF(("fixed length = %d\n", fixed_length)); |
DPRINTF(("fixed length = %d\n", fixed_length)); |
| 5864 |
if (fixed_length < 0) |
if (fixed_length == -3) |
| 5865 |
|
{ |
| 5866 |
|
cd->check_lookbehind = TRUE; |
| 5867 |
|
} |
| 5868 |
|
else if (fixed_length < 0) |
| 5869 |
{ |
{ |
| 5870 |
*errorcodeptr = (fixed_length == -2)? ERR36 : ERR25; |
*errorcodeptr = (fixed_length == -2)? ERR36 : ERR25; |
| 5871 |
*ptrptr = ptr; |
*ptrptr = ptr; |
| 5872 |
return FALSE; |
return FALSE; |
| 5873 |
} |
} |
| 5874 |
PUT(reverse_count, 0, fixed_length); |
else { PUT(reverse_count, 0, fixed_length); } |
| 5875 |
} |
} |
| 5876 |
} |
} |
| 5877 |
|
|
| 6274 |
int firstbyte, reqbyte, newline; |
int firstbyte, reqbyte, newline; |
| 6275 |
int errorcode = 0; |
int errorcode = 0; |
| 6276 |
int skipatstart = 0; |
int skipatstart = 0; |
| 6277 |
#ifdef SUPPORT_UTF8 |
BOOL utf8 = (options & PCRE_UTF8) != 0; |
|
BOOL utf8; |
|
|
#endif |
|
| 6278 |
size_t size; |
size_t size; |
| 6279 |
uschar *code; |
uschar *code; |
| 6280 |
const uschar *codestart; |
const uschar *codestart; |
| 6371 |
/* Can't support UTF8 unless PCRE has been compiled to include the code. */ |
/* Can't support UTF8 unless PCRE has been compiled to include the code. */ |
| 6372 |
|
|
| 6373 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
|
utf8 = (options & PCRE_UTF8) != 0; |
|
| 6374 |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && |
| 6375 |
(*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0) |
(*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0) |
| 6376 |
{ |
{ |
| 6378 |
goto PCRE_EARLY_ERROR_RETURN2; |
goto PCRE_EARLY_ERROR_RETURN2; |
| 6379 |
} |
} |
| 6380 |
#else |
#else |
| 6381 |
if ((options & PCRE_UTF8) != 0) |
if (utf8) |
| 6382 |
{ |
{ |
| 6383 |
errorcode = ERR32; |
errorcode = ERR32; |
| 6384 |
goto PCRE_EARLY_ERROR_RETURN; |
goto PCRE_EARLY_ERROR_RETURN; |
| 6542 |
cd->hwm = cworkspace; |
cd->hwm = cworkspace; |
| 6543 |
cd->req_varyopt = 0; |
cd->req_varyopt = 0; |
| 6544 |
cd->had_accept = FALSE; |
cd->had_accept = FALSE; |
| 6545 |
|
cd->check_lookbehind = FALSE; |
| 6546 |
cd->open_caps = NULL; |
cd->open_caps = NULL; |
| 6547 |
|
|
| 6548 |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
| 6582 |
cd->hwm -= LINK_SIZE; |
cd->hwm -= LINK_SIZE; |
| 6583 |
offset = GET(cd->hwm, 0); |
offset = GET(cd->hwm, 0); |
| 6584 |
recno = GET(codestart, offset); |
recno = GET(codestart, offset); |
| 6585 |
groupptr = find_bracket(codestart, (re->options & PCRE_UTF8) != 0, recno); |
groupptr = find_bracket(codestart, utf8, recno); |
| 6586 |
if (groupptr == NULL) errorcode = ERR53; |
if (groupptr == NULL) errorcode = ERR53; |
| 6587 |
else PUT(((uschar *)codestart), offset, groupptr - codestart); |
else PUT(((uschar *)codestart), offset, groupptr - codestart); |
| 6588 |
} |
} |
| 6592 |
|
|
| 6593 |
if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15; |
if (errorcode == 0 && re->top_backref > re->top_bracket) errorcode = ERR15; |
| 6594 |
|
|
| 6595 |
|
/* If there were any lookbehind assertions that contained OP_RECURSE |
| 6596 |
|
(recursions or subroutine calls), a flag is set for them to be checked here, |
| 6597 |
|
because they may contain forward references. Actual recursions can't be fixed |
| 6598 |
|
length, but subroutine calls can. It is done like this so that those without |
| 6599 |
|
OP_RECURSE that are not fixed length get a diagnosic with a useful offset. The |
| 6600 |
|
exceptional ones forgo this. We scan the pattern to check that they are fixed |
| 6601 |
|
length, and set their lengths. */ |
| 6602 |
|
|
| 6603 |
|
if (cd->check_lookbehind) |
| 6604 |
|
{ |
| 6605 |
|
uschar *cc = (uschar *)codestart; |
| 6606 |
|
|
| 6607 |
|
/* Loop, searching for OP_REVERSE items, and process those that do not have |
| 6608 |
|
their length set. (Actually, it will also re-process any that have a length |
| 6609 |
|
of zero, but that is a pathological case, and it does no harm.) When we find |
| 6610 |
|
one, we temporarily terminate the branch it is in while we scan it. */ |
| 6611 |
|
|
| 6612 |
|
for (cc = (uschar *)find_bracket(codestart, utf8, -1); |
| 6613 |
|
cc != NULL; |
| 6614 |
|
cc = (uschar *)find_bracket(cc, utf8, -1)) |
| 6615 |
|
{ |
| 6616 |
|
if (GET(cc, 1) == 0) |
| 6617 |
|
{ |
| 6618 |
|
int fixed_length; |
| 6619 |
|
uschar *be = cc - 1 - LINK_SIZE + GET(cc, -LINK_SIZE); |
| 6620 |
|
int end_op = *be; |
| 6621 |
|
*be = OP_END; |
| 6622 |
|
fixed_length = find_fixedlength(cc, re->options, TRUE, cd); |
| 6623 |
|
*be = end_op; |
| 6624 |
|
DPRINTF(("fixed length = %d\n", fixed_length)); |
| 6625 |
|
if (fixed_length < 0) |
| 6626 |
|
{ |
| 6627 |
|
errorcode = (fixed_length == -2)? ERR36 : ERR25; |
| 6628 |
|
break; |
| 6629 |
|
} |
| 6630 |
|
PUT(cc, 1, fixed_length); |
| 6631 |
|
} |
| 6632 |
|
cc += 1 + LINK_SIZE; |
| 6633 |
|
} |
| 6634 |
|
} |
| 6635 |
|
|
| 6636 |
/* Failed to compile, or error while post-processing */ |
/* Failed to compile, or error while post-processing */ |
| 6637 |
|
|
| 6638 |
if (errorcode != 0) |
if (errorcode != 0) |