| 1718 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 1719 |
if (utf8) |
if (utf8) |
| 1720 |
{ |
{ |
| 1721 |
|
/* Get status of previous character */ |
| 1722 |
|
|
| 1723 |
if (eptr == md->start_subject) prev_is_word = FALSE; else |
if (eptr == md->start_subject) prev_is_word = FALSE; else |
| 1724 |
{ |
{ |
| 1725 |
USPTR lastptr = eptr - 1; |
USPTR lastptr = eptr - 1; |
| 1726 |
while((*lastptr & 0xc0) == 0x80) lastptr--; |
while((*lastptr & 0xc0) == 0x80) lastptr--; |
| 1727 |
if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; |
if (lastptr < md->start_used_ptr) md->start_used_ptr = lastptr; |
| 1728 |
GETCHAR(c, lastptr); |
GETCHAR(c, lastptr); |
| 1729 |
|
#ifdef SUPPORT_UCP |
| 1730 |
|
if (md->use_ucp) |
| 1731 |
|
{ |
| 1732 |
|
if (c == '_') prev_is_word = TRUE; else |
| 1733 |
|
{ |
| 1734 |
|
int cat = UCD_CATEGORY(c); |
| 1735 |
|
prev_is_word = (cat == ucp_L || cat == ucp_N); |
| 1736 |
|
} |
| 1737 |
|
} |
| 1738 |
|
else |
| 1739 |
|
#endif |
| 1740 |
prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; |
prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; |
| 1741 |
} |
} |
| 1742 |
|
|
| 1743 |
|
/* Get status of next character */ |
| 1744 |
|
|
| 1745 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 1746 |
{ |
{ |
| 1747 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
| 1750 |
else |
else |
| 1751 |
{ |
{ |
| 1752 |
GETCHAR(c, eptr); |
GETCHAR(c, eptr); |
| 1753 |
|
#ifdef SUPPORT_UCP |
| 1754 |
|
if (md->use_ucp) |
| 1755 |
|
{ |
| 1756 |
|
if (c == '_') cur_is_word = TRUE; else |
| 1757 |
|
{ |
| 1758 |
|
int cat = UCD_CATEGORY(c); |
| 1759 |
|
cur_is_word = (cat == ucp_L || cat == ucp_N); |
| 1760 |
|
} |
| 1761 |
|
} |
| 1762 |
|
else |
| 1763 |
|
#endif |
| 1764 |
cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; |
cur_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; |
| 1765 |
} |
} |
| 1766 |
} |
} |
| 1767 |
else |
else |
| 1768 |
#endif |
#endif |
| 1769 |
|
|
| 1770 |
/* Not in UTF-8 mode */ |
/* Not in UTF-8 mode, but we may still have PCRE_UCP set, and for |
| 1771 |
|
consistency with the behaviour of \w we do use it in this case. */ |
| 1772 |
|
|
| 1773 |
{ |
{ |
| 1774 |
|
/* Get status of previous character */ |
| 1775 |
|
|
| 1776 |
if (eptr == md->start_subject) prev_is_word = FALSE; else |
if (eptr == md->start_subject) prev_is_word = FALSE; else |
| 1777 |
{ |
{ |
| 1778 |
if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1; |
if (eptr <= md->start_used_ptr) md->start_used_ptr = eptr - 1; |
| 1779 |
|
#ifdef SUPPORT_UCP |
| 1780 |
|
if (md->use_ucp) |
| 1781 |
|
{ |
| 1782 |
|
c = eptr[-1]; |
| 1783 |
|
if (c == '_') prev_is_word = TRUE; else |
| 1784 |
|
{ |
| 1785 |
|
int cat = UCD_CATEGORY(c); |
| 1786 |
|
prev_is_word = (cat == ucp_L || cat == ucp_N); |
| 1787 |
|
} |
| 1788 |
|
} |
| 1789 |
|
else |
| 1790 |
|
#endif |
| 1791 |
prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); |
prev_is_word = ((md->ctypes[eptr[-1]] & ctype_word) != 0); |
| 1792 |
} |
} |
| 1793 |
|
|
| 1794 |
|
/* Get status of next character */ |
| 1795 |
|
|
| 1796 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 1797 |
{ |
{ |
| 1798 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
| 1799 |
cur_is_word = FALSE; |
cur_is_word = FALSE; |
| 1800 |
} |
} |
| 1801 |
else cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); |
else |
| 1802 |
|
#ifdef SUPPORT_UCP |
| 1803 |
|
if (md->use_ucp) |
| 1804 |
|
{ |
| 1805 |
|
c = *eptr; |
| 1806 |
|
if (c == '_') cur_is_word = TRUE; else |
| 1807 |
|
{ |
| 1808 |
|
int cat = UCD_CATEGORY(c); |
| 1809 |
|
cur_is_word = (cat == ucp_L || cat == ucp_N); |
| 1810 |
|
} |
| 1811 |
|
} |
| 1812 |
|
else |
| 1813 |
|
#endif |
| 1814 |
|
cur_is_word = ((md->ctypes[*eptr] & ctype_word) != 0); |
| 1815 |
} |
} |
| 1816 |
|
|
| 1817 |
/* Now see if the situation is what we want */ |
/* Now see if the situation is what we want */ |
| 5665 |
|
|
| 5666 |
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
| 5667 |
utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0; |
utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0; |
| 5668 |
|
md->use_ucp = (re->options & PCRE_UCP) != 0; |
| 5669 |
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
| 5670 |
|
|
| 5671 |
md->notbol = (options & PCRE_NOTBOL) != 0; |
md->notbol = (options & PCRE_NOTBOL) != 0; |