| 88 |
The same workspace is used during the second, actual compile phase for |
The same workspace is used during the second, actual compile phase for |
| 89 |
remembering forward references to groups so that they can be filled in at the |
remembering forward references to groups so that they can be filled in at the |
| 90 |
end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE |
end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE |
| 91 |
is 4 there is plenty of room. */ |
is 4 there is plenty of room for most patterns. However, the memory can get |
| 92 |
|
filled up by repetitions of forward references, for example patterns like |
| 93 |
|
/(?1){0,1999}(b)/, and one user did hit the limit. The code has been changed so |
| 94 |
|
that the workspace is expanded using malloc() in this situation. The value |
| 95 |
|
below is therefore a minimum, and we put a maximum on it for safety. The |
| 96 |
|
minimum is now also defined in terms of LINK_SIZE so that the use of malloc() |
| 97 |
|
kicks in at the same number of forward references in all cases. */ |
| 98 |
|
|
| 99 |
#define COMPILE_WORK_SIZE (4096) |
#define COMPILE_WORK_SIZE (2048*LINK_SIZE) |
| 100 |
|
#define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE) |
| 101 |
|
|
| 102 |
/* The overrun tests check for a slightly smaller size so that they detect the |
/* The overrun tests check for a slightly smaller size so that they detect the |
| 103 |
overrun before it actually does run off the end of the data block. */ |
overrun before it actually does run off the end of the data block. */ |
| 104 |
|
|
| 105 |
#define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100) |
#define WORK_SIZE_SAFETY_MARGIN (100) |
| 106 |
|
|
| 107 |
|
|
| 108 |
/* Table for handling escaped characters in the range '0'-'z'. Positive returns |
/* Table for handling escaped characters in the range '0'-'z'. Positive returns |
| 417 |
"this version of PCRE is not compiled with PCRE_UCP support\0" |
"this version of PCRE is not compiled with PCRE_UCP support\0" |
| 418 |
"\\c must be followed by an ASCII character\0" |
"\\c must be followed by an ASCII character\0" |
| 419 |
"\\k is not followed by a braced, angle-bracketed, or quoted name\0" |
"\\k is not followed by a braced, angle-bracketed, or quoted name\0" |
| 420 |
|
/* 70 */ |
| 421 |
|
"internal error: unknown opcode in find_fixedlength()\0" |
| 422 |
|
"\\N is not supported in a class\0" |
| 423 |
|
"too many forward references\0" |
| 424 |
; |
; |
| 425 |
|
|
| 426 |
/* Table to identify digits and hex digits. This is used when compiling |
/* Table to identify digits and hex digits. This is used when compiling |
| 589 |
|
|
| 590 |
|
|
| 591 |
/************************************************* |
/************************************************* |
| 592 |
|
* Expand the workspace * |
| 593 |
|
*************************************************/ |
| 594 |
|
|
| 595 |
|
/* This function is called during the second compiling phase, if the number of |
| 596 |
|
forward references fills the existing workspace, which is originally a block on |
| 597 |
|
the stack. A larger block is obtained from malloc() unless the ultimate limit |
| 598 |
|
has been reached or the increase will be rather small. |
| 599 |
|
|
| 600 |
|
Argument: pointer to the compile data block |
| 601 |
|
Returns: 0 if all went well, else an error number |
| 602 |
|
*/ |
| 603 |
|
|
| 604 |
|
static int |
| 605 |
|
expand_workspace(compile_data *cd) |
| 606 |
|
{ |
| 607 |
|
uschar *newspace; |
| 608 |
|
int newsize = cd->workspace_size * 2; |
| 609 |
|
|
| 610 |
|
if (newsize > COMPILE_WORK_SIZE_MAX) newsize = COMPILE_WORK_SIZE_MAX; |
| 611 |
|
if (cd->workspace_size >= COMPILE_WORK_SIZE_MAX || |
| 612 |
|
newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN) |
| 613 |
|
return ERR72; |
| 614 |
|
|
| 615 |
|
newspace = (pcre_malloc)(newsize); |
| 616 |
|
if (newspace == NULL) return ERR21; |
| 617 |
|
|
| 618 |
|
memcpy(newspace, cd->start_workspace, cd->workspace_size); |
| 619 |
|
cd->hwm = (uschar *)newspace + (cd->hwm - cd->start_workspace); |
| 620 |
|
if (cd->workspace_size > COMPILE_WORK_SIZE) |
| 621 |
|
(pcre_free)((void *)cd->start_workspace); |
| 622 |
|
cd->start_workspace = newspace; |
| 623 |
|
cd->workspace_size = newsize; |
| 624 |
|
return 0; |
| 625 |
|
} |
| 626 |
|
|
| 627 |
|
|
| 628 |
|
|
| 629 |
|
/************************************************* |
| 630 |
* Check for counted repeat * |
* Check for counted repeat * |
| 631 |
*************************************************/ |
*************************************************/ |
| 632 |
|
|
| 1575 |
|
|
| 1576 |
Returns: the fixed length, |
Returns: the fixed length, |
| 1577 |
or -1 if there is no fixed length, |
or -1 if there is no fixed length, |
| 1578 |
or -2 if \C was encountered |
or -2 if \C was encountered (in UTF-8 mode only) |
| 1579 |
or -3 if an OP_RECURSE item was encountered and atend is FALSE |
or -3 if an OP_RECURSE item was encountered and atend is FALSE |
| 1580 |
|
or -4 if an unknown opcode was encountered (internal error) |
| 1581 |
*/ |
*/ |
| 1582 |
|
|
| 1583 |
static int |
static int |
| 1601 |
/* We only need to continue for OP_CBRA (normal capturing bracket) and |
/* We only need to continue for OP_CBRA (normal capturing bracket) and |
| 1602 |
OP_BRA (normal non-capturing bracket) because the other variants of these |
OP_BRA (normal non-capturing bracket) because the other variants of these |
| 1603 |
opcodes are all concerned with unlimited repeated groups, which of course |
opcodes are all concerned with unlimited repeated groups, which of course |
| 1604 |
are not of fixed length. They will cause a -1 response from the default |
are not of fixed length. */ |
|
case of this switch. */ |
|
| 1605 |
|
|
| 1606 |
case OP_CBRA: |
case OP_CBRA: |
| 1607 |
case OP_BRA: |
case OP_BRA: |
| 1615 |
cc += 1 + LINK_SIZE; |
cc += 1 + LINK_SIZE; |
| 1616 |
break; |
break; |
| 1617 |
|
|
| 1618 |
/* Reached end of a branch; if it's a ket it is the end of a nested |
/* Reached end of a branch; if it's a ket it is the end of a nested call. |
| 1619 |
call. If it's ALT it is an alternation in a nested call. If it is |
If it's ALT it is an alternation in a nested call. An ACCEPT is effectively |
| 1620 |
END it's the end of the outer call. All can be handled by the same code. |
an ALT. If it is END it's the end of the outer call. All can be handled by |
| 1621 |
Note that we must not include the OP_KETRxxx opcodes here, because they |
the same code. Note that we must not include the OP_KETRxxx opcodes here, |
| 1622 |
all imply an unlimited repeat. */ |
because they all imply an unlimited repeat. */ |
| 1623 |
|
|
| 1624 |
case OP_ALT: |
case OP_ALT: |
| 1625 |
case OP_KET: |
case OP_KET: |
| 1626 |
case OP_END: |
case OP_END: |
| 1627 |
|
case OP_ACCEPT: |
| 1628 |
|
case OP_ASSERT_ACCEPT: |
| 1629 |
if (length < 0) length = branchlength; |
if (length < 0) length = branchlength; |
| 1630 |
else if (length != branchlength) return -1; |
else if (length != branchlength) return -1; |
| 1631 |
if (*cc != OP_ALT) return length; |
if (*cc != OP_ALT) return length; |
| 1659 |
|
|
| 1660 |
/* Skip over things that don't match chars */ |
/* Skip over things that don't match chars */ |
| 1661 |
|
|
| 1662 |
case OP_REVERSE: |
case OP_MARK: |
| 1663 |
case OP_CREF: |
case OP_PRUNE_ARG: |
| 1664 |
case OP_NCREF: |
case OP_SKIP_ARG: |
| 1665 |
case OP_RREF: |
case OP_THEN_ARG: |
| 1666 |
case OP_NRREF: |
cc += cc[1] + _pcre_OP_lengths[*cc]; |
| 1667 |
case OP_DEF: |
break; |
| 1668 |
|
|
| 1669 |
case OP_CALLOUT: |
case OP_CALLOUT: |
|
case OP_SOD: |
|
|
case OP_SOM: |
|
|
case OP_SET_SOM: |
|
|
case OP_EOD: |
|
|
case OP_EODN: |
|
| 1670 |
case OP_CIRC: |
case OP_CIRC: |
| 1671 |
case OP_CIRCM: |
case OP_CIRCM: |
| 1672 |
|
case OP_CLOSE: |
| 1673 |
|
case OP_COMMIT: |
| 1674 |
|
case OP_CREF: |
| 1675 |
|
case OP_DEF: |
| 1676 |
case OP_DOLL: |
case OP_DOLL: |
| 1677 |
case OP_DOLLM: |
case OP_DOLLM: |
| 1678 |
|
case OP_EOD: |
| 1679 |
|
case OP_EODN: |
| 1680 |
|
case OP_FAIL: |
| 1681 |
|
case OP_NCREF: |
| 1682 |
|
case OP_NRREF: |
| 1683 |
case OP_NOT_WORD_BOUNDARY: |
case OP_NOT_WORD_BOUNDARY: |
| 1684 |
|
case OP_PRUNE: |
| 1685 |
|
case OP_REVERSE: |
| 1686 |
|
case OP_RREF: |
| 1687 |
|
case OP_SET_SOM: |
| 1688 |
|
case OP_SKIP: |
| 1689 |
|
case OP_SOD: |
| 1690 |
|
case OP_SOM: |
| 1691 |
|
case OP_THEN: |
| 1692 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
| 1693 |
cc += _pcre_OP_lengths[*cc]; |
cc += _pcre_OP_lengths[*cc]; |
| 1694 |
break; |
break; |
| 1710 |
need to skip over a multibyte character in UTF8 mode. */ |
need to skip over a multibyte character in UTF8 mode. */ |
| 1711 |
|
|
| 1712 |
case OP_EXACT: |
case OP_EXACT: |
| 1713 |
|
case OP_EXACTI: |
| 1714 |
|
case OP_NOTEXACT: |
| 1715 |
|
case OP_NOTEXACTI: |
| 1716 |
branchlength += GET2(cc,1); |
branchlength += GET2(cc,1); |
| 1717 |
cc += 4; |
cc += 4; |
| 1718 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 1733 |
cc += 2; |
cc += 2; |
| 1734 |
/* Fall through */ |
/* Fall through */ |
| 1735 |
|
|
| 1736 |
|
case OP_HSPACE: |
| 1737 |
|
case OP_VSPACE: |
| 1738 |
|
case OP_NOT_HSPACE: |
| 1739 |
|
case OP_NOT_VSPACE: |
| 1740 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 1741 |
case OP_DIGIT: |
case OP_DIGIT: |
| 1742 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 1749 |
cc++; |
cc++; |
| 1750 |
break; |
break; |
| 1751 |
|
|
| 1752 |
/* The single-byte matcher isn't allowed */ |
/* The single-byte matcher isn't allowed. This only happens in UTF-8 mode; |
| 1753 |
|
otherwise \C is coded as OP_ALLANY. */ |
| 1754 |
|
|
| 1755 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 1756 |
return -2; |
return -2; |
| 1769 |
|
|
| 1770 |
switch (*cc) |
switch (*cc) |
| 1771 |
{ |
{ |
| 1772 |
|
case OP_CRPLUS: |
| 1773 |
|
case OP_CRMINPLUS: |
| 1774 |
case OP_CRSTAR: |
case OP_CRSTAR: |
| 1775 |
case OP_CRMINSTAR: |
case OP_CRMINSTAR: |
| 1776 |
case OP_CRQUERY: |
case OP_CRQUERY: |
| 1791 |
|
|
| 1792 |
/* Anything else is variable length */ |
/* Anything else is variable length */ |
| 1793 |
|
|
| 1794 |
default: |
case OP_ANYNL: |
| 1795 |
|
case OP_BRAMINZERO: |
| 1796 |
|
case OP_BRAPOS: |
| 1797 |
|
case OP_BRAPOSZERO: |
| 1798 |
|
case OP_BRAZERO: |
| 1799 |
|
case OP_CBRAPOS: |
| 1800 |
|
case OP_EXTUNI: |
| 1801 |
|
case OP_KETRMAX: |
| 1802 |
|
case OP_KETRMIN: |
| 1803 |
|
case OP_KETRPOS: |
| 1804 |
|
case OP_MINPLUS: |
| 1805 |
|
case OP_MINPLUSI: |
| 1806 |
|
case OP_MINQUERY: |
| 1807 |
|
case OP_MINQUERYI: |
| 1808 |
|
case OP_MINSTAR: |
| 1809 |
|
case OP_MINSTARI: |
| 1810 |
|
case OP_MINUPTO: |
| 1811 |
|
case OP_MINUPTOI: |
| 1812 |
|
case OP_NOTMINPLUS: |
| 1813 |
|
case OP_NOTMINPLUSI: |
| 1814 |
|
case OP_NOTMINQUERY: |
| 1815 |
|
case OP_NOTMINQUERYI: |
| 1816 |
|
case OP_NOTMINSTAR: |
| 1817 |
|
case OP_NOTMINSTARI: |
| 1818 |
|
case OP_NOTMINUPTO: |
| 1819 |
|
case OP_NOTMINUPTOI: |
| 1820 |
|
case OP_NOTPLUS: |
| 1821 |
|
case OP_NOTPLUSI: |
| 1822 |
|
case OP_NOTPOSPLUS: |
| 1823 |
|
case OP_NOTPOSPLUSI: |
| 1824 |
|
case OP_NOTPOSQUERY: |
| 1825 |
|
case OP_NOTPOSQUERYI: |
| 1826 |
|
case OP_NOTPOSSTAR: |
| 1827 |
|
case OP_NOTPOSSTARI: |
| 1828 |
|
case OP_NOTPOSUPTO: |
| 1829 |
|
case OP_NOTPOSUPTOI: |
| 1830 |
|
case OP_NOTQUERY: |
| 1831 |
|
case OP_NOTQUERYI: |
| 1832 |
|
case OP_NOTSTAR: |
| 1833 |
|
case OP_NOTSTARI: |
| 1834 |
|
case OP_NOTUPTO: |
| 1835 |
|
case OP_NOTUPTOI: |
| 1836 |
|
case OP_PLUS: |
| 1837 |
|
case OP_PLUSI: |
| 1838 |
|
case OP_POSPLUS: |
| 1839 |
|
case OP_POSPLUSI: |
| 1840 |
|
case OP_POSQUERY: |
| 1841 |
|
case OP_POSQUERYI: |
| 1842 |
|
case OP_POSSTAR: |
| 1843 |
|
case OP_POSSTARI: |
| 1844 |
|
case OP_POSUPTO: |
| 1845 |
|
case OP_POSUPTOI: |
| 1846 |
|
case OP_QUERY: |
| 1847 |
|
case OP_QUERYI: |
| 1848 |
|
case OP_REF: |
| 1849 |
|
case OP_REFI: |
| 1850 |
|
case OP_SBRA: |
| 1851 |
|
case OP_SBRAPOS: |
| 1852 |
|
case OP_SCBRA: |
| 1853 |
|
case OP_SCBRAPOS: |
| 1854 |
|
case OP_SCOND: |
| 1855 |
|
case OP_SKIPZERO: |
| 1856 |
|
case OP_STAR: |
| 1857 |
|
case OP_STARI: |
| 1858 |
|
case OP_TYPEMINPLUS: |
| 1859 |
|
case OP_TYPEMINQUERY: |
| 1860 |
|
case OP_TYPEMINSTAR: |
| 1861 |
|
case OP_TYPEMINUPTO: |
| 1862 |
|
case OP_TYPEPLUS: |
| 1863 |
|
case OP_TYPEPOSPLUS: |
| 1864 |
|
case OP_TYPEPOSQUERY: |
| 1865 |
|
case OP_TYPEPOSSTAR: |
| 1866 |
|
case OP_TYPEPOSUPTO: |
| 1867 |
|
case OP_TYPEQUERY: |
| 1868 |
|
case OP_TYPESTAR: |
| 1869 |
|
case OP_TYPEUPTO: |
| 1870 |
|
case OP_UPTO: |
| 1871 |
|
case OP_UPTOI: |
| 1872 |
return -1; |
return -1; |
| 1873 |
|
|
| 1874 |
|
/* Catch unrecognized opcodes so that when new ones are added they |
| 1875 |
|
are not forgotten, as has happened in the past. */ |
| 1876 |
|
|
| 1877 |
|
default: |
| 1878 |
|
return -4; |
| 1879 |
} |
} |
| 1880 |
} |
} |
| 1881 |
/* Control never gets here */ |
/* Control never gets here */ |
| 3377 |
#ifdef PCRE_DEBUG |
#ifdef PCRE_DEBUG |
| 3378 |
if (code > cd->hwm) cd->hwm = code; /* High water info */ |
if (code > cd->hwm) cd->hwm = code; /* High water info */ |
| 3379 |
#endif |
#endif |
| 3380 |
if (code > cd->start_workspace + WORK_SIZE_CHECK) /* Check for overrun */ |
if (code > cd->start_workspace + cd->workspace_size - |
| 3381 |
|
WORK_SIZE_SAFETY_MARGIN) /* Check for overrun */ |
| 3382 |
{ |
{ |
| 3383 |
*errorcodeptr = ERR52; |
*errorcodeptr = ERR52; |
| 3384 |
goto FAILED; |
goto FAILED; |
| 3401 |
} |
} |
| 3402 |
|
|
| 3403 |
*lengthptr += (int)(code - last_code); |
*lengthptr += (int)(code - last_code); |
| 3404 |
DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c)); |
DPRINTF(("length=%d added %d c=%c\n", *lengthptr, (int)(code - last_code), |
| 3405 |
|
c)); |
| 3406 |
|
|
| 3407 |
/* If "previous" is set and it is not at the start of the work space, move |
/* If "previous" is set and it is not at the start of the work space, move |
| 3408 |
it back to there, in order to avoid filling up the work space. Otherwise, |
it back to there, in order to avoid filling up the work space. Otherwise, |
| 3428 |
/* In the real compile phase, just check the workspace used by the forward |
/* In the real compile phase, just check the workspace used by the forward |
| 3429 |
reference list. */ |
reference list. */ |
| 3430 |
|
|
| 3431 |
else if (cd->hwm > cd->start_workspace + WORK_SIZE_CHECK) |
else if (cd->hwm > cd->start_workspace + cd->workspace_size - |
| 3432 |
|
WORK_SIZE_SAFETY_MARGIN) |
| 3433 |
{ |
{ |
| 3434 |
*errorcodeptr = ERR52; |
*errorcodeptr = ERR52; |
| 3435 |
goto FAILED; |
goto FAILED; |
| 3683 |
|
|
| 3684 |
if (lengthptr != NULL) |
if (lengthptr != NULL) |
| 3685 |
{ |
{ |
| 3686 |
*lengthptr += class_utf8data - class_utf8data_base; |
*lengthptr += (int)(class_utf8data - class_utf8data_base); |
| 3687 |
class_utf8data = class_utf8data_base; |
class_utf8data = class_utf8data_base; |
| 3688 |
} |
} |
| 3689 |
|
|
| 3819 |
if (*errorcodeptr != 0) goto FAILED; |
if (*errorcodeptr != 0) goto FAILED; |
| 3820 |
|
|
| 3821 |
if (-c == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ |
if (-c == ESC_b) c = CHAR_BS; /* \b is backspace in a class */ |
| 3822 |
|
else if (-c == ESC_N) /* \N is not supported in a class */ |
| 3823 |
|
{ |
| 3824 |
|
*errorcodeptr = ERR71; |
| 3825 |
|
goto FAILED; |
| 3826 |
|
} |
| 3827 |
else if (-c == ESC_Q) /* Handle start of quoted string */ |
else if (-c == ESC_Q) /* Handle start of quoted string */ |
| 3828 |
{ |
{ |
| 3829 |
if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) |
if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E) |
| 4382 |
|
|
| 4383 |
/* Now fill in the complete length of the item */ |
/* Now fill in the complete length of the item */ |
| 4384 |
|
|
| 4385 |
PUT(previous, 1, code - previous); |
PUT(previous, 1, (int)(code - previous)); |
| 4386 |
break; /* End of class handling */ |
break; /* End of class handling */ |
| 4387 |
} |
} |
| 4388 |
#endif |
#endif |
| 4524 |
{ |
{ |
| 4525 |
uschar *lastchar = code - 1; |
uschar *lastchar = code - 1; |
| 4526 |
while((*lastchar & 0xc0) == 0x80) lastchar--; |
while((*lastchar & 0xc0) == 0x80) lastchar--; |
| 4527 |
c = code - lastchar; /* Length of UTF-8 character */ |
c = (int)(code - lastchar); /* Length of UTF-8 character */ |
| 4528 |
memcpy(utf8_char, lastchar, c); /* Save the char */ |
memcpy(utf8_char, lastchar, c); /* Save the char */ |
| 4529 |
c |= 0x80; /* Flag c as a length */ |
c |= 0x80; /* Flag c as a length */ |
| 4530 |
} |
} |
| 4931 |
*lengthptr += delta; |
*lengthptr += delta; |
| 4932 |
} |
} |
| 4933 |
|
|
| 4934 |
/* This is compiling for real */ |
/* This is compiling for real. If there is a set first byte for |
| 4935 |
|
the group, and we have not yet set a "required byte", set it. Make |
| 4936 |
|
sure there is enough workspace for copying forward references before |
| 4937 |
|
doing the copy. */ |
| 4938 |
|
|
| 4939 |
else |
else |
| 4940 |
{ |
{ |
| 4941 |
if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte; |
if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte; |
| 4942 |
|
|
| 4943 |
for (i = 1; i < repeat_min; i++) |
for (i = 1; i < repeat_min; i++) |
| 4944 |
{ |
{ |
| 4945 |
uschar *hc; |
uschar *hc; |
| 4946 |
uschar *this_hwm = cd->hwm; |
uschar *this_hwm = cd->hwm; |
| 4947 |
memcpy(code, previous, len); |
memcpy(code, previous, len); |
| 4948 |
|
|
| 4949 |
|
while (cd->hwm > cd->start_workspace + cd->workspace_size - |
| 4950 |
|
WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm)) |
| 4951 |
|
{ |
| 4952 |
|
int save_offset = save_hwm - cd->start_workspace; |
| 4953 |
|
int this_offset = this_hwm - cd->start_workspace; |
| 4954 |
|
*errorcodeptr = expand_workspace(cd); |
| 4955 |
|
if (*errorcodeptr != 0) goto FAILED; |
| 4956 |
|
save_hwm = (uschar *)cd->start_workspace + save_offset; |
| 4957 |
|
this_hwm = (uschar *)cd->start_workspace + this_offset; |
| 4958 |
|
} |
| 4959 |
|
|
| 4960 |
for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) |
for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) |
| 4961 |
{ |
{ |
| 4962 |
PUT(cd->hwm, 0, GET(hc, 0) + len); |
PUT(cd->hwm, 0, GET(hc, 0) + len); |
| 5024 |
} |
} |
| 5025 |
|
|
| 5026 |
memcpy(code, previous, len); |
memcpy(code, previous, len); |
| 5027 |
|
|
| 5028 |
|
/* Ensure there is enough workspace for forward references before |
| 5029 |
|
copying them. */ |
| 5030 |
|
|
| 5031 |
|
while (cd->hwm > cd->start_workspace + cd->workspace_size - |
| 5032 |
|
WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm)) |
| 5033 |
|
{ |
| 5034 |
|
int save_offset = save_hwm - cd->start_workspace; |
| 5035 |
|
int this_offset = this_hwm - cd->start_workspace; |
| 5036 |
|
*errorcodeptr = expand_workspace(cd); |
| 5037 |
|
if (*errorcodeptr != 0) goto FAILED; |
| 5038 |
|
save_hwm = (uschar *)cd->start_workspace + save_offset; |
| 5039 |
|
this_hwm = (uschar *)cd->start_workspace + this_offset; |
| 5040 |
|
} |
| 5041 |
|
|
| 5042 |
for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) |
for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE) |
| 5043 |
{ |
{ |
| 5044 |
PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); |
PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1)); |
| 5070 |
behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to |
behaviour of (?:xx)++ is the same as (?>xx)++ and this saves having to |
| 5071 |
deal with possessive ONCEs specially. |
deal with possessive ONCEs specially. |
| 5072 |
|
|
| 5073 |
Otherwise, if the quantifier was possessive, we convert the BRA code to |
Otherwise, when we are doing the actual compile phase, check to see |
| 5074 |
the POS form, and the KET code to KETRPOS. (It turns out to be convenient |
whether this group is one that could match an empty string. If so, |
| 5075 |
at runtime to detect this kind of subpattern at both the start and at the |
convert the initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so |
| 5076 |
end.) The use of special opcodes makes it possible to reduce greatly the |
that runtime checking can be done. [This check is also applied to ONCE |
| 5077 |
stack usage in pcre_exec(). If the group is preceded by OP_BRAZERO, |
groups at runtime, but in a different way.] |
| 5078 |
convert this to OP_BRAPOSZERO. Then cancel the possessive flag so that |
|
| 5079 |
the default action below, of wrapping everything inside atomic brackets, |
Then, if the quantifier was possessive and the bracket is not a |
| 5080 |
does not happen. |
conditional, we convert the BRA code to the POS form, and the KET code to |
| 5081 |
|
KETRPOS. (It turns out to be convenient at runtime to detect this kind of |
| 5082 |
Then, when we are doing the actual compile phase, check to see whether |
subpattern at both the start and at the end.) The use of special opcodes |
| 5083 |
this group is one that could match an empty string. If so, convert the |
makes it possible to reduce greatly the stack usage in pcre_exec(). If |
| 5084 |
initial operator to the S form (e.g. OP_BRA -> OP_SBRA) so that runtime |
the group is preceded by OP_BRAZERO, convert this to OP_BRAPOSZERO. |
| 5085 |
checking can be done. [This check is also applied to ONCE groups at |
|
| 5086 |
runtime, but in a different way.] */ |
Then, if the minimum number of matches is 1 or 0, cancel the possessive |
| 5087 |
|
flag so that the default action below, of wrapping everything inside |
| 5088 |
|
atomic brackets, does not happen. When the minimum is greater than 1, |
| 5089 |
|
there will be earlier copies of the group, and so we still have to wrap |
| 5090 |
|
the whole thing. */ |
| 5091 |
|
|
| 5092 |
else |
else |
| 5093 |
{ |
{ |
| 5094 |
uschar *ketcode = code - 1 - LINK_SIZE; |
uschar *ketcode = code - 1 - LINK_SIZE; |
| 5095 |
uschar *bracode = ketcode - GET(ketcode, 1); |
uschar *bracode = ketcode - GET(ketcode, 1); |
| 5096 |
|
|
| 5097 |
|
/* Convert possessive ONCE brackets to non-capturing */ |
| 5098 |
|
|
| 5099 |
if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && |
if ((*bracode == OP_ONCE || *bracode == OP_ONCE_NC) && |
| 5100 |
possessive_quantifier) *bracode = OP_BRA; |
possessive_quantifier) *bracode = OP_BRA; |
| 5101 |
|
|
| 5102 |
|
/* For non-possessive ONCE brackets, all we need to do is to |
| 5103 |
|
set the KET. */ |
| 5104 |
|
|
| 5105 |
if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) |
if (*bracode == OP_ONCE || *bracode == OP_ONCE_NC) |
| 5106 |
*ketcode = OP_KETRMAX + repeat_type; |
*ketcode = OP_KETRMAX + repeat_type; |
| 5107 |
|
|
| 5108 |
|
/* Handle non-ONCE brackets and possessive ONCEs (which have been |
| 5109 |
|
converted to non-capturing above). */ |
| 5110 |
|
|
| 5111 |
else |
else |
| 5112 |
{ |
{ |
| 5113 |
if (possessive_quantifier) |
/* In the compile phase, check for empty string matching. */ |
|
{ |
|
|
*bracode += 1; /* Switch to xxxPOS opcodes */ |
|
|
*ketcode = OP_KETRPOS; |
|
|
if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; |
|
|
possessive_quantifier = FALSE; |
|
|
} |
|
|
else *ketcode = OP_KETRMAX + repeat_type; |
|
| 5114 |
|
|
| 5115 |
if (lengthptr == NULL) |
if (lengthptr == NULL) |
| 5116 |
{ |
{ |
| 5126 |
} |
} |
| 5127 |
while (*scode == OP_ALT); |
while (*scode == OP_ALT); |
| 5128 |
} |
} |
| 5129 |
|
|
| 5130 |
|
/* Handle possessive quantifiers. */ |
| 5131 |
|
|
| 5132 |
|
if (possessive_quantifier) |
| 5133 |
|
{ |
| 5134 |
|
/* For COND brackets, we wrap the whole thing in a possessively |
| 5135 |
|
repeated non-capturing bracket, because we have not invented POS |
| 5136 |
|
versions of the COND opcodes. Because we are moving code along, we |
| 5137 |
|
must ensure that any pending recursive references are updated. */ |
| 5138 |
|
|
| 5139 |
|
if (*bracode == OP_COND || *bracode == OP_SCOND) |
| 5140 |
|
{ |
| 5141 |
|
int nlen = (int)(code - bracode); |
| 5142 |
|
*code = OP_END; |
| 5143 |
|
adjust_recurse(bracode, 1 + LINK_SIZE, utf8, cd, save_hwm); |
| 5144 |
|
memmove(bracode + 1+LINK_SIZE, bracode, nlen); |
| 5145 |
|
code += 1 + LINK_SIZE; |
| 5146 |
|
nlen += 1 + LINK_SIZE; |
| 5147 |
|
*bracode = OP_BRAPOS; |
| 5148 |
|
*code++ = OP_KETRPOS; |
| 5149 |
|
PUTINC(code, 0, nlen); |
| 5150 |
|
PUT(bracode, 1, nlen); |
| 5151 |
|
} |
| 5152 |
|
|
| 5153 |
|
/* For non-COND brackets, we modify the BRA code and use KETRPOS. */ |
| 5154 |
|
|
| 5155 |
|
else |
| 5156 |
|
{ |
| 5157 |
|
*bracode += 1; /* Switch to xxxPOS opcodes */ |
| 5158 |
|
*ketcode = OP_KETRPOS; |
| 5159 |
|
} |
| 5160 |
|
|
| 5161 |
|
/* If the minimum is zero, mark it as possessive, then unset the |
| 5162 |
|
possessive flag when the minimum is 0 or 1. */ |
| 5163 |
|
|
| 5164 |
|
if (brazeroptr != NULL) *brazeroptr = OP_BRAPOSZERO; |
| 5165 |
|
if (repeat_min < 2) possessive_quantifier = FALSE; |
| 5166 |
|
} |
| 5167 |
|
|
| 5168 |
|
/* Non-possessive quantifier */ |
| 5169 |
|
|
| 5170 |
|
else *ketcode = OP_KETRMAX + repeat_type; |
| 5171 |
} |
} |
| 5172 |
} |
} |
| 5173 |
} |
} |
| 5194 |
notation is just syntactic sugar, taken from Sun's Java package, but the |
notation is just syntactic sugar, taken from Sun's Java package, but the |
| 5195 |
special opcodes can optimize it. |
special opcodes can optimize it. |
| 5196 |
|
|
| 5197 |
Possessively repeated subpatterns have already been handled in the code |
Some (but not all) possessively repeated subpatterns have already been |
| 5198 |
just above, so possessive_quantifier is always FALSE for them at this |
completely handled in the code just above. For them, possessive_quantifier |
| 5199 |
stage. |
is always FALSE at this stage. |
| 5200 |
|
|
| 5201 |
Note that the repeated item starts at tempcode, not at previous, which |
Note that the repeated item starts at tempcode, not at previous, which |
| 5202 |
might be the first part of a string whose (former) last char we repeated. |
might be the first part of a string whose (former) last char we repeated. |
| 5686 |
|
|
| 5687 |
/* ------------------------------------------------------------ */ |
/* ------------------------------------------------------------ */ |
| 5688 |
case CHAR_C: /* Callout - may be followed by digits; */ |
case CHAR_C: /* Callout - may be followed by digits; */ |
| 5689 |
previous_callout = code; /* Save for later completion */ |
previous_callout = code; /* Save for later completion */ |
| 5690 |
after_manual_callout = 1; /* Skip one item before completing */ |
after_manual_callout = 1; /* Skip one item before completing */ |
| 5691 |
*code++ = OP_CALLOUT; |
*code++ = OP_CALLOUT; |
| 5692 |
{ |
{ |
| 5693 |
int n = 0; |
int n = 0; |
| 6058 |
of the group. Then remember the forward reference. */ |
of the group. Then remember the forward reference. */ |
| 6059 |
|
|
| 6060 |
called = cd->start_code + recno; |
called = cd->start_code + recno; |
| 6061 |
|
if (cd->hwm >= cd->start_workspace + cd->workspace_size - |
| 6062 |
|
WORK_SIZE_SAFETY_MARGIN) |
| 6063 |
|
{ |
| 6064 |
|
*errorcodeptr = expand_workspace(cd); |
| 6065 |
|
if (*errorcodeptr != 0) goto FAILED; |
| 6066 |
|
} |
| 6067 |
PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code)); |
PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code)); |
| 6068 |
} |
} |
| 6069 |
|
|
| 6084 |
} |
} |
| 6085 |
} |
} |
| 6086 |
|
|
| 6087 |
/* Insert the recursion/subroutine item. */ |
/* Insert the recursion/subroutine item. It does not have a set first |
| 6088 |
|
byte (relevant if it is repeated, because it will then be wrapped |
| 6089 |
|
with ONCE brackets). */ |
| 6090 |
|
|
| 6091 |
*code = OP_RECURSE; |
*code = OP_RECURSE; |
| 6092 |
PUT(code, 1, (int)(called - cd->start_code)); |
PUT(code, 1, (int)(called - cd->start_code)); |
| 6093 |
code += 1 + LINK_SIZE; |
code += 1 + LINK_SIZE; |
| 6094 |
|
groupsetfirstbyte = FALSE; |
| 6095 |
} |
} |
| 6096 |
|
|
| 6097 |
/* Can't determine a first byte now */ |
/* Can't determine a first byte now */ |
| 6573 |
} |
} |
| 6574 |
else |
else |
| 6575 |
#endif |
#endif |
| 6576 |
|
/* In non-UTF-8 mode, we turn \C into OP_ALLANY instead of OP_ANYBYTE |
| 6577 |
|
so that it works in DFA mode and in lookbehinds. */ |
| 6578 |
|
|
| 6579 |
{ |
{ |
| 6580 |
previous = (-c > ESC_b && -c < ESC_Z)? code : NULL; |
previous = (-c > ESC_b && -c < ESC_Z)? code : NULL; |
| 6581 |
*code++ = -c; |
*code++ = (!utf8 && c == -ESC_C)? OP_ALLANY : -c; |
| 6582 |
} |
} |
| 6583 |
} |
} |
| 6584 |
continue; |
continue; |
| 6870 |
} |
} |
| 6871 |
else if (fixed_length < 0) |
else if (fixed_length < 0) |
| 6872 |
{ |
{ |
| 6873 |
*errorcodeptr = (fixed_length == -2)? ERR36 : ERR25; |
*errorcodeptr = (fixed_length == -2)? ERR36 : |
| 6874 |
|
(fixed_length == -4)? ERR70: ERR25; |
| 6875 |
*ptrptr = ptr; |
*ptrptr = ptr; |
| 6876 |
return FALSE; |
return FALSE; |
| 6877 |
} |
} |
| 7314 |
computing the amount of memory that is needed. Compiled items are thrown away |
computing the amount of memory that is needed. Compiled items are thrown away |
| 7315 |
as soon as possible, so that a fairly large buffer should be sufficient for |
as soon as possible, so that a fairly large buffer should be sufficient for |
| 7316 |
this purpose. The same space is used in the second phase for remembering where |
this purpose. The same space is used in the second phase for remembering where |
| 7317 |
to fill in forward references to subpatterns. */ |
to fill in forward references to subpatterns. That may overflow, in which case |
| 7318 |
|
new memory is obtained from malloc(). */ |
| 7319 |
|
|
| 7320 |
uschar cworkspace[COMPILE_WORK_SIZE]; |
uschar cworkspace[COMPILE_WORK_SIZE]; |
| 7321 |
|
|
| 7505 |
cd->names_found = 0; |
cd->names_found = 0; |
| 7506 |
cd->name_entry_size = 0; |
cd->name_entry_size = 0; |
| 7507 |
cd->name_table = NULL; |
cd->name_table = NULL; |
|
cd->start_workspace = cworkspace; |
|
| 7508 |
cd->start_code = cworkspace; |
cd->start_code = cworkspace; |
| 7509 |
cd->hwm = cworkspace; |
cd->hwm = cworkspace; |
| 7510 |
|
cd->start_workspace = cworkspace; |
| 7511 |
|
cd->workspace_size = COMPILE_WORK_SIZE; |
| 7512 |
cd->start_pattern = (const uschar *)pattern; |
cd->start_pattern = (const uschar *)pattern; |
| 7513 |
cd->end_pattern = (const uschar *)(pattern + strlen(pattern)); |
cd->end_pattern = (const uschar *)(pattern + strlen(pattern)); |
| 7514 |
cd->req_varyopt = 0; |
cd->req_varyopt = 0; |
| 7586 |
cd->name_table = (uschar *)re + re->name_table_offset; |
cd->name_table = (uschar *)re + re->name_table_offset; |
| 7587 |
codestart = cd->name_table + re->name_entry_size * re->name_count; |
codestart = cd->name_table + re->name_entry_size * re->name_count; |
| 7588 |
cd->start_code = codestart; |
cd->start_code = codestart; |
| 7589 |
cd->hwm = cworkspace; |
cd->hwm = (uschar *)(cd->start_workspace); |
| 7590 |
cd->req_varyopt = 0; |
cd->req_varyopt = 0; |
| 7591 |
cd->had_accept = FALSE; |
cd->had_accept = FALSE; |
| 7592 |
cd->check_lookbehind = FALSE; |
cd->check_lookbehind = FALSE; |
| 7620 |
if (code - codestart > length) errorcode = ERR23; |
if (code - codestart > length) errorcode = ERR23; |
| 7621 |
#endif |
#endif |
| 7622 |
|
|
| 7623 |
/* Fill in any forward references that are required. */ |
/* Fill in any forward references that are required. There may be repeated |
| 7624 |
|
references; optimize for them, as searching a large regex takes time. */ |
| 7625 |
|
|
| 7626 |
while (errorcode == 0 && cd->hwm > cworkspace) |
if (cd->hwm > cd->start_workspace) |
| 7627 |
{ |
{ |
| 7628 |
int offset, recno; |
int prev_recno = -1; |
| 7629 |
const uschar *groupptr; |
const uschar *groupptr = NULL; |
| 7630 |
cd->hwm -= LINK_SIZE; |
while (errorcode == 0 && cd->hwm > cd->start_workspace) |
| 7631 |
offset = GET(cd->hwm, 0); |
{ |
| 7632 |
recno = GET(codestart, offset); |
int offset, recno; |
| 7633 |
groupptr = _pcre_find_bracket(codestart, utf8, recno); |
cd->hwm -= LINK_SIZE; |
| 7634 |
if (groupptr == NULL) errorcode = ERR53; |
offset = GET(cd->hwm, 0); |
| 7635 |
else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart)); |
recno = GET(codestart, offset); |
| 7636 |
|
if (recno != prev_recno) |
| 7637 |
|
{ |
| 7638 |
|
groupptr = _pcre_find_bracket(codestart, utf8, recno); |
| 7639 |
|
prev_recno = recno; |
| 7640 |
|
} |
| 7641 |
|
if (groupptr == NULL) errorcode = ERR53; |
| 7642 |
|
else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart)); |
| 7643 |
|
} |
| 7644 |
} |
} |
| 7645 |
|
|
| 7646 |
|
/* If the workspace had to be expanded, free the new memory. */ |
| 7647 |
|
|
| 7648 |
|
if (cd->workspace_size > COMPILE_WORK_SIZE) |
| 7649 |
|
(pcre_free)((void *)cd->start_workspace); |
| 7650 |
|
|
| 7651 |
/* Give an error if there's back reference to a non-existent capturing |
/* Give an error if there's back reference to a non-existent capturing |
| 7652 |
subpattern. */ |
subpattern. */ |
| 7653 |
|
|
| 7686 |
DPRINTF(("fixed length = %d\n", fixed_length)); |
DPRINTF(("fixed length = %d\n", fixed_length)); |
| 7687 |
if (fixed_length < 0) |
if (fixed_length < 0) |
| 7688 |
{ |
{ |
| 7689 |
errorcode = (fixed_length == -2)? ERR36 : ERR25; |
errorcode = (fixed_length == -2)? ERR36 : |
| 7690 |
|
(fixed_length == -4)? ERR70 : ERR25; |
| 7691 |
break; |
break; |
| 7692 |
} |
} |
| 7693 |
PUT(cc, 1, fixed_length); |
PUT(cc, 1, fixed_length); |