| 1397 |
|
|
| 1398 |
/* This is called by several functions that scan a compiled expression looking |
/* This is called by several functions that scan a compiled expression looking |
| 1399 |
for a fixed first character, or an anchoring op code etc. It skips over things |
for a fixed first character, or an anchoring op code etc. It skips over things |
| 1400 |
that do not influence this. For some calls, a change of option is important. |
that do not influence this. For some calls, it makes sense to skip negative |
| 1401 |
For some calls, it makes sense to skip negative forward and all backward |
forward and all backward assertions, and also the \b assertion; for others it |
| 1402 |
assertions, and also the \b assertion; for others it does not. |
does not. |
| 1403 |
|
|
| 1404 |
Arguments: |
Arguments: |
| 1405 |
code pointer to the start of the group |
code pointer to the start of the group |
| 1419 |
{ |
{ |
| 1420 |
switch ((int)*code) |
switch ((int)*code) |
| 1421 |
{ |
{ |
|
case OP_OPT: |
|
|
if (optbit > 0 && ((int)code[1] & optbit) != (*options & optbit)) |
|
|
*options = (int)code[1]; |
|
|
code += 2; |
|
|
break; |
|
|
|
|
| 1422 |
case OP_ASSERT_NOT: |
case OP_ASSERT_NOT: |
| 1423 |
case OP_ASSERTBACK: |
case OP_ASSERTBACK: |
| 1424 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
| 1555 |
case OP_RREF: |
case OP_RREF: |
| 1556 |
case OP_NRREF: |
case OP_NRREF: |
| 1557 |
case OP_DEF: |
case OP_DEF: |
|
case OP_OPT: |
|
| 1558 |
case OP_CALLOUT: |
case OP_CALLOUT: |
| 1559 |
case OP_SOD: |
case OP_SOD: |
| 1560 |
case OP_SOM: |
case OP_SOM: |
| 1562 |
case OP_EOD: |
case OP_EOD: |
| 1563 |
case OP_EODN: |
case OP_EODN: |
| 1564 |
case OP_CIRC: |
case OP_CIRC: |
| 1565 |
|
case OP_CIRCM: |
| 1566 |
case OP_DOLL: |
case OP_DOLL: |
| 1567 |
|
case OP_DOLLM: |
| 1568 |
case OP_NOT_WORD_BOUNDARY: |
case OP_NOT_WORD_BOUNDARY: |
| 1569 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
| 1570 |
cc += _pcre_OP_lengths[*cc]; |
cc += _pcre_OP_lengths[*cc]; |
| 1573 |
/* Handle literal characters */ |
/* Handle literal characters */ |
| 1574 |
|
|
| 1575 |
case OP_CHAR: |
case OP_CHAR: |
| 1576 |
case OP_CHARNC: |
case OP_CHARI: |
| 1577 |
case OP_NOT: |
case OP_NOT: |
| 1578 |
|
case OP_NOTI: |
| 1579 |
branchlength++; |
branchlength++; |
| 1580 |
cc += 2; |
cc += 2; |
| 1581 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 1770 |
if (utf8) switch(c) |
if (utf8) switch(c) |
| 1771 |
{ |
{ |
| 1772 |
case OP_CHAR: |
case OP_CHAR: |
| 1773 |
case OP_CHARNC: |
case OP_CHARI: |
| 1774 |
case OP_EXACT: |
case OP_EXACT: |
| 1775 |
|
case OP_EXACTI: |
| 1776 |
case OP_UPTO: |
case OP_UPTO: |
| 1777 |
|
case OP_UPTOI: |
| 1778 |
case OP_MINUPTO: |
case OP_MINUPTO: |
| 1779 |
|
case OP_MINUPTOI: |
| 1780 |
case OP_POSUPTO: |
case OP_POSUPTO: |
| 1781 |
|
case OP_POSUPTOI: |
| 1782 |
case OP_STAR: |
case OP_STAR: |
| 1783 |
|
case OP_STARI: |
| 1784 |
case OP_MINSTAR: |
case OP_MINSTAR: |
| 1785 |
|
case OP_MINSTARI: |
| 1786 |
case OP_POSSTAR: |
case OP_POSSTAR: |
| 1787 |
|
case OP_POSSTARI: |
| 1788 |
case OP_PLUS: |
case OP_PLUS: |
| 1789 |
|
case OP_PLUSI: |
| 1790 |
case OP_MINPLUS: |
case OP_MINPLUS: |
| 1791 |
|
case OP_MINPLUSI: |
| 1792 |
case OP_POSPLUS: |
case OP_POSPLUS: |
| 1793 |
|
case OP_POSPLUSI: |
| 1794 |
case OP_QUERY: |
case OP_QUERY: |
| 1795 |
|
case OP_QUERYI: |
| 1796 |
case OP_MINQUERY: |
case OP_MINQUERY: |
| 1797 |
|
case OP_MINQUERYI: |
| 1798 |
case OP_POSQUERY: |
case OP_POSQUERY: |
| 1799 |
|
case OP_POSQUERYI: |
| 1800 |
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; |
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; |
| 1801 |
break; |
break; |
| 1802 |
} |
} |
| 1889 |
if (utf8) switch(c) |
if (utf8) switch(c) |
| 1890 |
{ |
{ |
| 1891 |
case OP_CHAR: |
case OP_CHAR: |
| 1892 |
case OP_CHARNC: |
case OP_CHARI: |
| 1893 |
case OP_EXACT: |
case OP_EXACT: |
| 1894 |
|
case OP_EXACTI: |
| 1895 |
case OP_UPTO: |
case OP_UPTO: |
| 1896 |
|
case OP_UPTOI: |
| 1897 |
case OP_MINUPTO: |
case OP_MINUPTO: |
| 1898 |
|
case OP_MINUPTOI: |
| 1899 |
case OP_POSUPTO: |
case OP_POSUPTO: |
| 1900 |
|
case OP_POSUPTOI: |
| 1901 |
case OP_STAR: |
case OP_STAR: |
| 1902 |
|
case OP_STARI: |
| 1903 |
case OP_MINSTAR: |
case OP_MINSTAR: |
| 1904 |
|
case OP_MINSTARI: |
| 1905 |
case OP_POSSTAR: |
case OP_POSSTAR: |
| 1906 |
|
case OP_POSSTARI: |
| 1907 |
case OP_PLUS: |
case OP_PLUS: |
| 1908 |
|
case OP_PLUSI: |
| 1909 |
case OP_MINPLUS: |
case OP_MINPLUS: |
| 1910 |
|
case OP_MINPLUSI: |
| 1911 |
case OP_POSPLUS: |
case OP_POSPLUS: |
| 1912 |
|
case OP_POSPLUSI: |
| 1913 |
case OP_QUERY: |
case OP_QUERY: |
| 1914 |
|
case OP_QUERYI: |
| 1915 |
case OP_MINQUERY: |
case OP_MINQUERY: |
| 1916 |
|
case OP_MINQUERYI: |
| 1917 |
case OP_POSQUERY: |
case OP_POSQUERY: |
| 1918 |
|
case OP_POSQUERYI: |
| 1919 |
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; |
if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f]; |
| 1920 |
break; |
break; |
| 1921 |
} |
} |
| 2093 |
case OP_ALLANY: |
case OP_ALLANY: |
| 2094 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 2095 |
case OP_CHAR: |
case OP_CHAR: |
| 2096 |
case OP_CHARNC: |
case OP_CHARI: |
| 2097 |
case OP_NOT: |
case OP_NOT: |
| 2098 |
|
case OP_NOTI: |
| 2099 |
case OP_PLUS: |
case OP_PLUS: |
| 2100 |
case OP_MINPLUS: |
case OP_MINPLUS: |
| 2101 |
case OP_POSPLUS: |
case OP_POSPLUS: |
| 2143 |
|
|
| 2144 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2145 |
case OP_STAR: |
case OP_STAR: |
| 2146 |
|
case OP_STARI: |
| 2147 |
case OP_MINSTAR: |
case OP_MINSTAR: |
| 2148 |
|
case OP_MINSTARI: |
| 2149 |
case OP_POSSTAR: |
case OP_POSSTAR: |
| 2150 |
|
case OP_POSSTARI: |
| 2151 |
case OP_QUERY: |
case OP_QUERY: |
| 2152 |
|
case OP_QUERYI: |
| 2153 |
case OP_MINQUERY: |
case OP_MINQUERY: |
| 2154 |
|
case OP_MINQUERYI: |
| 2155 |
case OP_POSQUERY: |
case OP_POSQUERY: |
| 2156 |
|
case OP_POSQUERYI: |
| 2157 |
if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f]; |
if (utf8 && code[1] >= 0xc0) code += _pcre_utf8_table4[code[1] & 0x3f]; |
| 2158 |
break; |
break; |
| 2159 |
|
|
| 2160 |
case OP_UPTO: |
case OP_UPTO: |
| 2161 |
|
case OP_UPTOI: |
| 2162 |
case OP_MINUPTO: |
case OP_MINUPTO: |
| 2163 |
|
case OP_MINUPTOI: |
| 2164 |
case OP_POSUPTO: |
case OP_POSUPTO: |
| 2165 |
|
case OP_POSUPTOI: |
| 2166 |
if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f]; |
if (utf8 && code[3] >= 0xc0) code += _pcre_utf8_table4[code[3] & 0x3f]; |
| 2167 |
break; |
break; |
| 2168 |
#endif |
#endif |
| 2654 |
#endif |
#endif |
| 2655 |
return c != next; |
return c != next; |
| 2656 |
|
|
| 2657 |
/* For CHARNC (caseless character) we must check the other case. If we have |
/* For CHARI (caseless character) we must check the other case. If we have |
| 2658 |
Unicode property support, we can use it to test the other case of |
Unicode property support, we can use it to test the other case of |
| 2659 |
high-valued characters. */ |
high-valued characters. */ |
| 2660 |
|
|
| 2661 |
case OP_CHARNC: |
case OP_CHARI: |
| 2662 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2663 |
GETCHARTEST(c, previous); |
GETCHARTEST(c, previous); |
| 2664 |
#else |
#else |
| 2681 |
#endif /* SUPPORT_UTF8 */ |
#endif /* SUPPORT_UTF8 */ |
| 2682 |
return (c != cd->fcc[next]); /* Non-UTF-8 mode */ |
return (c != cd->fcc[next]); /* Non-UTF-8 mode */ |
| 2683 |
|
|
| 2684 |
/* For OP_NOT, its data is always a single-byte character. */ |
/* For OP_NOT and OP_NOTI, the data is always a single-byte character. These |
| 2685 |
|
opcodes are not used for multi-byte characters, because they are coded using |
| 2686 |
|
an XCLASS instead. */ |
| 2687 |
|
|
| 2688 |
case OP_NOT: |
case OP_NOT: |
| 2689 |
|
return (c = *previous) == next; |
| 2690 |
|
|
| 2691 |
|
case OP_NOTI: |
| 2692 |
if ((c = *previous) == next) return TRUE; |
if ((c = *previous) == next) return TRUE; |
|
if ((options & PCRE_CASELESS) == 0) return FALSE; |
|
| 2693 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2694 |
if (utf8) |
if (utf8) |
| 2695 |
{ |
{ |
| 2794 |
switch(op_code) |
switch(op_code) |
| 2795 |
{ |
{ |
| 2796 |
case OP_CHAR: |
case OP_CHAR: |
| 2797 |
case OP_CHARNC: |
case OP_CHARI: |
| 2798 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2799 |
GETCHARTEST(c, previous); |
GETCHARTEST(c, previous); |
| 2800 |
#else |
#else |
| 3258 |
the setting of any following char as a first character. */ |
the setting of any following char as a first character. */ |
| 3259 |
|
|
| 3260 |
case CHAR_CIRCUMFLEX_ACCENT: |
case CHAR_CIRCUMFLEX_ACCENT: |
| 3261 |
|
previous = NULL; |
| 3262 |
if ((options & PCRE_MULTILINE) != 0) |
if ((options & PCRE_MULTILINE) != 0) |
| 3263 |
{ |
{ |
| 3264 |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
| 3265 |
|
*code++ = OP_CIRCM; |
| 3266 |
} |
} |
| 3267 |
previous = NULL; |
else *code++ = OP_CIRC; |
|
*code++ = OP_CIRC; |
|
| 3268 |
break; |
break; |
| 3269 |
|
|
| 3270 |
case CHAR_DOLLAR_SIGN: |
case CHAR_DOLLAR_SIGN: |
| 3271 |
previous = NULL; |
previous = NULL; |
| 3272 |
*code++ = OP_DOLL; |
*code++ = ((options & PCRE_MULTILINE) != 0)? OP_DOLLM : OP_DOLL; |
| 3273 |
break; |
break; |
| 3274 |
|
|
| 3275 |
/* There can never be a first char if '.' is first, whatever happens about |
/* There can never be a first char if '.' is first, whatever happens about |
| 4015 |
|
|
| 4016 |
In UTF-8 mode, we can optimize the negative case only if there were no |
In UTF-8 mode, we can optimize the negative case only if there were no |
| 4017 |
characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR |
characters >= 128 because OP_NOT and the related opcodes like OP_NOTSTAR |
| 4018 |
operate on single-bytes only. This is an historical hangover. Maybe one day |
operate on single-bytes characters only. This is an historical hangover. |
| 4019 |
we can tidy these opcodes to handle multi-byte characters. |
Maybe one day we can tidy these opcodes to handle multi-byte characters. |
| 4020 |
|
|
| 4021 |
The optimization throws away the bit map. We turn the item into a |
The optimization throws away the bit map. We turn the item into a |
| 4022 |
1-character OP_CHAR[NC] if it's positive, or OP_NOT if it's negative. Note |
1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative. |
| 4023 |
that OP_NOT does not support multibyte characters. In the positive case, it |
Note that OP_NOT[I] does not support multibyte characters. In the positive |
| 4024 |
can cause firstbyte to be set. Otherwise, there can be no first char if |
case, it can cause firstbyte to be set. Otherwise, there can be no first |
| 4025 |
this item is first, whatever repeat count may follow. In the case of |
char if this item is first, whatever repeat count may follow. In the case |
| 4026 |
reqbyte, save the previous value for reinstating. */ |
of reqbyte, save the previous value for reinstating. */ |
| 4027 |
|
|
| 4028 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 4029 |
if (class_charcount == 1 && !class_utf8 && |
if (class_charcount == 1 && !class_utf8 && |
| 4034 |
{ |
{ |
| 4035 |
zeroreqbyte = reqbyte; |
zeroreqbyte = reqbyte; |
| 4036 |
|
|
| 4037 |
/* The OP_NOT opcode works on one-byte characters only. */ |
/* The OP_NOT[I] opcodes work on one-byte characters only. */ |
| 4038 |
|
|
| 4039 |
if (negate_class) |
if (negate_class) |
| 4040 |
{ |
{ |
| 4041 |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
| 4042 |
zerofirstbyte = firstbyte; |
zerofirstbyte = firstbyte; |
| 4043 |
*code++ = OP_NOT; |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_NOTI: OP_NOT; |
| 4044 |
*code++ = class_lastchar; |
*code++ = class_lastchar; |
| 4045 |
break; |
break; |
| 4046 |
} |
} |
| 4198 |
the first thing in a branch because the x will have gone into firstbyte |
the first thing in a branch because the x will have gone into firstbyte |
| 4199 |
instead. */ |
instead. */ |
| 4200 |
|
|
| 4201 |
if (*previous == OP_CHAR || *previous == OP_CHARNC) |
if (*previous == OP_CHAR || *previous == OP_CHARI) |
| 4202 |
{ |
{ |
| 4203 |
|
op_type = (*previous == OP_CHAR)? 0 : OP_STARI - OP_STAR; |
| 4204 |
|
|
| 4205 |
/* Deal with UTF-8 characters that take up more than one byte. It's |
/* Deal with UTF-8 characters that take up more than one byte. It's |
| 4206 |
easier to write this out separately than try to macrify it. Use c to |
easier to write this out separately than try to macrify it. Use c to |
| 4207 |
hold the length of the character in bytes, plus 0x80 to flag that it's a |
hold the length of the character in bytes, plus 0x80 to flag that it's a |
| 4246 |
/* If previous was a single negated character ([^a] or similar), we use |
/* If previous was a single negated character ([^a] or similar), we use |
| 4247 |
one of the special opcodes, replacing it. The code is shared with single- |
one of the special opcodes, replacing it. The code is shared with single- |
| 4248 |
character repeats by setting opt_type to add a suitable offset into |
character repeats by setting opt_type to add a suitable offset into |
| 4249 |
repeat_type. We can also test for auto-possessification. OP_NOT is |
repeat_type. We can also test for auto-possessification. OP_NOT and OP_NOTI |
| 4250 |
currently used only for single-byte chars. */ |
are currently used only for single-byte chars. */ |
| 4251 |
|
|
| 4252 |
else if (*previous == OP_NOT) |
else if (*previous == OP_NOT || *previous == OP_NOTI) |
| 4253 |
{ |
{ |
| 4254 |
op_type = OP_NOTSTAR - OP_STAR; /* Use "not" opcodes */ |
op_type = ((*previous == OP_NOT)? OP_NOTSTAR : OP_NOTSTARI) - OP_STAR; |
| 4255 |
c = previous[1]; |
c = previous[1]; |
| 4256 |
if (!possessive_quantifier && |
if (!possessive_quantifier && |
| 4257 |
repeat_max < 0 && |
repeat_max < 0 && |
| 4448 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 4449 |
*previous == OP_XCLASS || |
*previous == OP_XCLASS || |
| 4450 |
#endif |
#endif |
| 4451 |
*previous == OP_REF) |
*previous == OP_REF || |
| 4452 |
|
*previous == OP_REFI) |
| 4453 |
{ |
{ |
| 4454 |
if (repeat_max == 0) |
if (repeat_max == 0) |
| 4455 |
{ |
{ |
| 4503 |
|
|
| 4504 |
/* If the maximum repeat count is unlimited, find the end of the bracket |
/* If the maximum repeat count is unlimited, find the end of the bracket |
| 4505 |
by scanning through from the start, and compute the offset back to it |
by scanning through from the start, and compute the offset back to it |
| 4506 |
from the current code pointer. There may be an OP_OPT setting following |
from the current code pointer. */ |
|
the final KET, so we can't find the end just by going back from the code |
|
|
pointer. */ |
|
| 4507 |
|
|
| 4508 |
if (repeat_max == -1) |
if (repeat_max == -1) |
| 4509 |
{ |
{ |
| 4803 |
case OP_QUERY: *tempcode = OP_POSQUERY; break; |
case OP_QUERY: *tempcode = OP_POSQUERY; break; |
| 4804 |
case OP_UPTO: *tempcode = OP_POSUPTO; break; |
case OP_UPTO: *tempcode = OP_POSUPTO; break; |
| 4805 |
|
|
| 4806 |
case OP_TYPESTAR: *tempcode = OP_TYPEPOSSTAR; break; |
case OP_STARI: *tempcode = OP_POSSTARI; break; |
| 4807 |
case OP_TYPEPLUS: *tempcode = OP_TYPEPOSPLUS; break; |
case OP_PLUSI: *tempcode = OP_POSPLUSI; break; |
| 4808 |
case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break; |
case OP_QUERYI: *tempcode = OP_POSQUERYI; break; |
| 4809 |
case OP_TYPEUPTO: *tempcode = OP_TYPEPOSUPTO; break; |
case OP_UPTOI: *tempcode = OP_POSUPTOI; break; |
| 4810 |
|
|
| 4811 |
case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break; |
case OP_NOTSTAR: *tempcode = OP_NOTPOSSTAR; break; |
| 4812 |
case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break; |
case OP_NOTPLUS: *tempcode = OP_NOTPOSPLUS; break; |
| 4813 |
case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break; |
case OP_NOTQUERY: *tempcode = OP_NOTPOSQUERY; break; |
| 4814 |
case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break; |
case OP_NOTUPTO: *tempcode = OP_NOTPOSUPTO; break; |
| 4815 |
|
|
| 4816 |
|
case OP_NOTSTARI: *tempcode = OP_NOTPOSSTARI; break; |
| 4817 |
|
case OP_NOTPLUSI: *tempcode = OP_NOTPOSPLUSI; break; |
| 4818 |
|
case OP_NOTQUERYI: *tempcode = OP_NOTPOSQUERYI; break; |
| 4819 |
|
case OP_NOTUPTOI: *tempcode = OP_NOTPOSUPTOI; break; |
| 4820 |
|
|
| 4821 |
|
case OP_TYPESTAR: *tempcode = OP_TYPEPOSSTAR; break; |
| 4822 |
|
case OP_TYPEPLUS: *tempcode = OP_TYPEPOSPLUS; break; |
| 4823 |
|
case OP_TYPEQUERY: *tempcode = OP_TYPEPOSQUERY; break; |
| 4824 |
|
case OP_TYPEUPTO: *tempcode = OP_TYPEPOSUPTO; break; |
| 4825 |
|
|
| 4826 |
/* Because we are moving code along, we must ensure that any |
/* Because we are moving code along, we must ensure that any |
| 4827 |
pending recursive references are updated. */ |
pending recursive references are updated. */ |
| 4828 |
|
|
| 5727 |
} |
} |
| 5728 |
else |
else |
| 5729 |
{ |
{ |
|
if ((options & PCRE_IMS) != (newoptions & PCRE_IMS)) |
|
|
{ |
|
|
*code++ = OP_OPT; |
|
|
*code++ = newoptions & PCRE_IMS; |
|
|
} |
|
| 5730 |
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); |
greedy_default = ((newoptions & PCRE_UNGREEDY) != 0); |
| 5731 |
greedy_non_default = greedy_default ^ 1; |
greedy_non_default = greedy_default ^ 1; |
| 5732 |
req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0; |
req_caseopt = ((newoptions & PCRE_CASELESS) != 0)? REQ_CASELESS : 0; |
| 6064 |
HANDLE_REFERENCE: /* Come here from named backref handling */ |
HANDLE_REFERENCE: /* Come here from named backref handling */ |
| 6065 |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
| 6066 |
previous = code; |
previous = code; |
| 6067 |
*code++ = OP_REF; |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF; |
| 6068 |
PUT2INC(code, 0, recno); |
PUT2INC(code, 0, recno); |
| 6069 |
cd->backref_map |= (recno < 32)? (1 << recno) : 1; |
cd->backref_map |= (recno < 32)? (1 << recno) : 1; |
| 6070 |
if (recno > cd->top_backref) cd->top_backref = recno; |
if (recno > cd->top_backref) cd->top_backref = recno; |
| 6172 |
|
|
| 6173 |
ONE_CHAR: |
ONE_CHAR: |
| 6174 |
previous = code; |
previous = code; |
| 6175 |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARNC : OP_CHAR; |
*code++ = ((options & PCRE_CASELESS) != 0)? OP_CHARI : OP_CHAR; |
| 6176 |
for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; |
for (c = 0; c < mclength; c++) *code++ = mcbuffer[c]; |
| 6177 |
|
|
| 6178 |
/* Remember if \r or \n were seen */ |
/* Remember if \r or \n were seen */ |
| 6236 |
/* On entry, ptr is pointing past the bracket character, but on return it |
/* On entry, ptr is pointing past the bracket character, but on return it |
| 6237 |
points to the closing bracket, or vertical bar, or end of string. The code |
points to the closing bracket, or vertical bar, or end of string. The code |
| 6238 |
variable is pointing at the byte into which the BRA operator has been stored. |
variable is pointing at the byte into which the BRA operator has been stored. |
|
If the ims options are changed at the start (for a (?ims: group) or during any |
|
|
branch, we need to insert an OP_OPT item at the start of every following branch |
|
|
to ensure they get set correctly at run time, and also pass the new options |
|
|
into every subsequent branch compile. |
|
|
|
|
| 6239 |
This function is used during the pre-compile phase when we are trying to find |
This function is used during the pre-compile phase when we are trying to find |
| 6240 |
out the amount of memory needed, as well as during the real compile phase. The |
out the amount of memory needed, as well as during the real compile phase. The |
| 6241 |
value of lengthptr distinguishes the two phases. |
value of lengthptr distinguishes the two phases. |
| 6327 |
|
|
| 6328 |
if (reset_bracount) cd->bracount = orig_bracount; |
if (reset_bracount) cd->bracount = orig_bracount; |
| 6329 |
|
|
|
/* Handle a change of ims options at the start of the branch */ |
|
|
|
|
|
if ((options & PCRE_IMS) != oldims) |
|
|
{ |
|
|
*code++ = OP_OPT; |
|
|
*code++ = options & PCRE_IMS; |
|
|
length += 2; |
|
|
} |
|
|
|
|
| 6330 |
/* Set up dummy OP_REVERSE if lookbehind assertion */ |
/* Set up dummy OP_REVERSE if lookbehind assertion */ |
| 6331 |
|
|
| 6332 |
if (lookbehind) |
if (lookbehind) |
| 6483 |
cd->open_caps = cd->open_caps->next; |
cd->open_caps = cd->open_caps->next; |
| 6484 |
} |
} |
| 6485 |
|
|
|
/* Reset options if needed. */ |
|
|
|
|
|
if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS) |
|
|
{ |
|
|
*code++ = OP_OPT; |
|
|
*code++ = oldims; |
|
|
length += 2; |
|
|
} |
|
|
|
|
| 6486 |
/* Retain the highest bracket number, in case resetting was used. */ |
/* Retain the highest bracket number, in case resetting was used. */ |
| 6487 |
|
|
| 6488 |
cd->bracount = max_bracount; |
cd->bracount = max_bracount; |
| 6542 |
/* Try to find out if this is an anchored regular expression. Consider each |
/* Try to find out if this is an anchored regular expression. Consider each |
| 6543 |
alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket |
alternative branch. If they all start with OP_SOD or OP_CIRC, or with a bracket |
| 6544 |
all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then |
all of whose alternatives start with OP_SOD or OP_CIRC (recurse ad lib), then |
| 6545 |
it's anchored. However, if this is a multiline pattern, then only OP_SOD |
it's anchored. However, if this is a multiline pattern, then only OP_SOD will |
| 6546 |
counts, since OP_CIRC can match in the middle. |
be found, because ^ generates OP_CIRCM in that mode. |
| 6547 |
|
|
| 6548 |
We can also consider a regex to be anchored if OP_SOM starts all its branches. |
We can also consider a regex to be anchored if OP_SOM starts all its branches. |
| 6549 |
This is the code for \G, which means "match at start of match position, taking |
This is the code for \G, which means "match at start of match position, taking |
| 6617 |
|
|
| 6618 |
/* Check for explicit anchoring */ |
/* Check for explicit anchoring */ |
| 6619 |
|
|
| 6620 |
else if (op != OP_SOD && op != OP_SOM && |
else if (op != OP_SOD && op != OP_SOM && op != OP_CIRC) return FALSE; |
|
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) |
|
|
return FALSE; |
|
| 6621 |
code += GET(code, 1); |
code += GET(code, 1); |
| 6622 |
} |
} |
| 6623 |
while (*code == OP_ALT); /* Loop for each alternative */ |
while (*code == OP_ALT); /* Loop for each alternative */ |
| 6717 |
|
|
| 6718 |
/* Check for explicit circumflex */ |
/* Check for explicit circumflex */ |
| 6719 |
|
|
| 6720 |
else if (op != OP_CIRC) return FALSE; |
else if (op != OP_CIRC && op != OP_CIRCM) return FALSE; |
| 6721 |
|
|
| 6722 |
/* Move on to the next alternative */ |
/* Move on to the next alternative */ |
| 6723 |
|
|
| 6778 |
scode += 2; |
scode += 2; |
| 6779 |
|
|
| 6780 |
case OP_CHAR: |
case OP_CHAR: |
| 6781 |
case OP_CHARNC: |
case OP_CHARI: |
| 6782 |
case OP_PLUS: |
case OP_PLUS: |
| 6783 |
case OP_MINPLUS: |
case OP_MINPLUS: |
| 6784 |
case OP_POSPLUS: |
case OP_POSPLUS: |