| 132 |
* Match a back-reference * |
* Match a back-reference * |
| 133 |
*************************************************/ |
*************************************************/ |
| 134 |
|
|
| 135 |
/* If a back reference hasn't been set, the length that is passed is greater |
/* Normally, if a back reference hasn't been set, the length that is passed is |
| 136 |
than the number of characters left in the string, so the match fails. |
negative, so the match always fails. However, in JavaScript compatibility mode, |
| 137 |
|
the length passed is zero. Note that in caseless UTF-8 mode, the number of |
| 138 |
|
subject bytes matched may be different to the number of reference bytes. |
| 139 |
|
|
| 140 |
Arguments: |
Arguments: |
| 141 |
offset index into the offset vector |
offset index into the offset vector |
| 142 |
eptr points into the subject |
eptr pointer into the subject |
| 143 |
length length to be matched |
length length of reference to be matched (number of bytes) |
| 144 |
md points to match data block |
md points to match data block |
| 145 |
ims the ims flags |
ims the ims flags |
| 146 |
|
|
| 147 |
Returns: TRUE if matched |
Returns: < 0 if not matched, otherwise the number of subject bytes matched |
| 148 |
*/ |
*/ |
| 149 |
|
|
| 150 |
static BOOL |
static int |
| 151 |
match_ref(int offset, register USPTR eptr, int length, match_data *md, |
match_ref(int offset, register USPTR eptr, int length, match_data *md, |
| 152 |
unsigned long int ims) |
unsigned long int ims) |
| 153 |
{ |
{ |
| 154 |
USPTR p = md->start_subject + md->offset_vector[offset]; |
USPTR eptr_start = eptr; |
| 155 |
|
register USPTR p = md->start_subject + md->offset_vector[offset]; |
| 156 |
|
|
| 157 |
#ifdef PCRE_DEBUG |
#ifdef PCRE_DEBUG |
| 158 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 167 |
printf("\n"); |
printf("\n"); |
| 168 |
#endif |
#endif |
| 169 |
|
|
| 170 |
/* Always fail if not enough characters left */ |
/* Always fail if reference not set (and not JavaScript compatible). */ |
| 171 |
|
|
| 172 |
if (length > md->end_subject - eptr) return FALSE; |
if (length < 0) return -1; |
| 173 |
|
|
| 174 |
/* Separate the caseless case for speed. In UTF-8 mode we can only do this |
/* Separate the caseless case for speed. In UTF-8 mode we can only do this |
| 175 |
properly if Unicode properties are supported. Otherwise, we can check only |
properly if Unicode properties are supported. Otherwise, we can check only |
| 181 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
| 182 |
if (md->utf8) |
if (md->utf8) |
| 183 |
{ |
{ |
| 184 |
USPTR endptr = eptr + length; |
/* Match characters up to the end of the reference. NOTE: the number of |
| 185 |
while (eptr < endptr) |
bytes matched may differ, because there are some characters whose upper and |
| 186 |
|
lower case versions code as different numbers of bytes. For example, U+023A |
| 187 |
|
(2 bytes in UTF-8) is the upper case version of U+2C65 (3 bytes in UTF-8); |
| 188 |
|
a sequence of 3 of the former uses 6 bytes, as does a sequence of two of |
| 189 |
|
the latter. It is important, therefore, to check the length along the |
| 190 |
|
reference, not along the subject (earlier code did this wrong). */ |
| 191 |
|
|
| 192 |
|
USPTR endptr = p + length; |
| 193 |
|
while (p < endptr) |
| 194 |
{ |
{ |
| 195 |
int c, d; |
int c, d; |
| 196 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
| 197 |
GETCHARINC(d, p); |
GETCHARINC(d, p); |
| 198 |
if (c != d && c != UCD_OTHERCASE(d)) return FALSE; |
if (c != d && c != UCD_OTHERCASE(d)) return -1; |
| 199 |
} |
} |
| 200 |
} |
} |
| 201 |
else |
else |
| 206 |
is no UCP support. */ |
is no UCP support. */ |
| 207 |
|
|
| 208 |
while (length-- > 0) |
while (length-- > 0) |
| 209 |
{ if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; } |
{ if (md->lcc[*p++] != md->lcc[*eptr++]) return -1; } |
| 210 |
} |
} |
| 211 |
|
|
| 212 |
/* In the caseful case, we can just compare the bytes, whether or not we |
/* In the caseful case, we can just compare the bytes, whether or not we |
| 213 |
are in UTF-8 mode. */ |
are in UTF-8 mode. */ |
| 214 |
|
|
| 215 |
else |
else |
| 216 |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
{ while (length-- > 0) if (*p++ != *eptr++) return -1; } |
| 217 |
|
|
| 218 |
return TRUE; |
return eptr - eptr_start; |
| 219 |
} |
} |
| 220 |
|
|
| 221 |
|
|
| 305 |
|
|
| 306 |
#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\ |
#define RMATCH(ra,rb,rc,rd,re,rf,rg,rw)\ |
| 307 |
{\ |
{\ |
| 308 |
heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\ |
heapframe *newframe = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe));\ |
| 309 |
if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ |
if (newframe == NULL) RRETURN(PCRE_ERROR_NOMEMORY);\ |
| 310 |
frame->Xwhere = rw; \ |
frame->Xwhere = rw; \ |
| 311 |
newframe->Xeptr = ra;\ |
newframe->Xeptr = ra;\ |
| 433 |
the subject. */ |
the subject. */ |
| 434 |
|
|
| 435 |
#define CHECK_PARTIAL()\ |
#define CHECK_PARTIAL()\ |
| 436 |
if (md->partial != 0 && eptr >= md->end_subject && eptr > mstart)\ |
if (md->partial != 0 && eptr >= md->end_subject && \ |
| 437 |
{\ |
eptr > md->start_used_ptr) \ |
| 438 |
md->hitend = TRUE;\ |
{ \ |
| 439 |
if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\ |
md->hitend = TRUE; \ |
| 440 |
|
if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \ |
| 441 |
} |
} |
| 442 |
|
|
| 443 |
#define SCHECK_PARTIAL()\ |
#define SCHECK_PARTIAL()\ |
| 444 |
if (md->partial != 0 && eptr > mstart)\ |
if (md->partial != 0 && eptr > md->start_used_ptr) \ |
| 445 |
{\ |
{ \ |
| 446 |
md->hitend = TRUE;\ |
md->hitend = TRUE; \ |
| 447 |
if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL);\ |
if (md->partial > 1) MRRETURN(PCRE_ERROR_PARTIAL); \ |
| 448 |
} |
} |
| 449 |
|
|
| 450 |
|
|
| 500 |
heap whenever RMATCH() does a "recursion". See the macro definitions above. */ |
heap whenever RMATCH() does a "recursion". See the macro definitions above. */ |
| 501 |
|
|
| 502 |
#ifdef NO_RECURSE |
#ifdef NO_RECURSE |
| 503 |
heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe)); |
heapframe *frame = (heapframe *)(pcre_stack_malloc)(sizeof(heapframe)); |
| 504 |
if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY); |
if (frame == NULL) RRETURN(PCRE_ERROR_NOMEMORY); |
| 505 |
frame->Xprevframe = NULL; /* Marks the top level */ |
frame->Xprevframe = NULL; /* Marks the top level */ |
| 506 |
|
|
| 722 |
case OP_FAIL: |
case OP_FAIL: |
| 723 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 724 |
|
|
| 725 |
|
/* COMMIT overrides PRUNE, SKIP, and THEN */ |
| 726 |
|
|
| 727 |
case OP_COMMIT: |
case OP_COMMIT: |
| 728 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
| 729 |
ims, eptrb, flags, RM52); |
ims, eptrb, flags, RM52); |
| 730 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && |
| 731 |
|
rrc != MATCH_SKIP && rrc != MATCH_SKIP_ARG && |
| 732 |
|
rrc != MATCH_THEN) |
| 733 |
|
RRETURN(rrc); |
| 734 |
MRRETURN(MATCH_COMMIT); |
MRRETURN(MATCH_COMMIT); |
| 735 |
|
|
| 736 |
|
/* PRUNE overrides THEN */ |
| 737 |
|
|
| 738 |
case OP_PRUNE: |
case OP_PRUNE: |
| 739 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
| 740 |
ims, eptrb, flags, RM51); |
ims, eptrb, flags, RM51); |
| 741 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
| 742 |
MRRETURN(MATCH_PRUNE); |
MRRETURN(MATCH_PRUNE); |
| 743 |
|
|
| 744 |
case OP_PRUNE_ARG: |
case OP_PRUNE_ARG: |
| 745 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, |
| 746 |
ims, eptrb, flags, RM56); |
ims, eptrb, flags, RM56); |
| 747 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
| 748 |
md->mark = ecode + 2; |
md->mark = ecode + 2; |
| 749 |
RRETURN(MATCH_PRUNE); |
RRETURN(MATCH_PRUNE); |
| 750 |
|
|
| 751 |
|
/* SKIP overrides PRUNE and THEN */ |
| 752 |
|
|
| 753 |
case OP_SKIP: |
case OP_SKIP: |
| 754 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
| 755 |
ims, eptrb, flags, RM53); |
ims, eptrb, flags, RM53); |
| 756 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) |
| 757 |
|
RRETURN(rrc); |
| 758 |
md->start_match_ptr = eptr; /* Pass back current position */ |
md->start_match_ptr = eptr; /* Pass back current position */ |
| 759 |
MRRETURN(MATCH_SKIP); |
MRRETURN(MATCH_SKIP); |
| 760 |
|
|
| 761 |
case OP_SKIP_ARG: |
case OP_SKIP_ARG: |
| 762 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, |
| 763 |
ims, eptrb, flags, RM57); |
ims, eptrb, flags, RM57); |
| 764 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH && rrc != MATCH_PRUNE && rrc != MATCH_THEN) |
| 765 |
|
RRETURN(rrc); |
| 766 |
|
|
| 767 |
/* Pass back the current skip name by overloading md->start_match_ptr and |
/* Pass back the current skip name by overloading md->start_match_ptr and |
| 768 |
returning the special MATCH_SKIP_ARG return code. This will either be |
returning the special MATCH_SKIP_ARG return code. This will either be |
| 772 |
md->start_match_ptr = ecode + 2; |
md->start_match_ptr = ecode + 2; |
| 773 |
RRETURN(MATCH_SKIP_ARG); |
RRETURN(MATCH_SKIP_ARG); |
| 774 |
|
|
| 775 |
|
/* For THEN (and THEN_ARG) we pass back the address of the bracket or |
| 776 |
|
the alt that is at the start of the current branch. This makes it possible |
| 777 |
|
to skip back past alternatives that precede the THEN within the current |
| 778 |
|
branch. */ |
| 779 |
|
|
| 780 |
case OP_THEN: |
case OP_THEN: |
| 781 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
| 782 |
ims, eptrb, flags, RM54); |
ims, eptrb, flags, RM54); |
| 783 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 784 |
|
md->start_match_ptr = ecode - GET(ecode, 1); |
| 785 |
MRRETURN(MATCH_THEN); |
MRRETURN(MATCH_THEN); |
| 786 |
|
|
| 787 |
case OP_THEN_ARG: |
case OP_THEN_ARG: |
| 788 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE], |
| 789 |
ims, eptrb, flags, RM58); |
offset_top, md, ims, eptrb, flags, RM58); |
| 790 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 791 |
md->mark = ecode + 2; |
md->start_match_ptr = ecode - GET(ecode, 1); |
| 792 |
|
md->mark = ecode + LINK_SIZE + 2; |
| 793 |
RRETURN(MATCH_THEN); |
RRETURN(MATCH_THEN); |
| 794 |
|
|
| 795 |
/* Handle a capturing bracket. If there is space in the offset vector, save |
/* Handle a capturing bracket. If there is space in the offset vector, save |
| 834 |
{ |
{ |
| 835 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
| 836 |
ims, eptrb, flags, RM1); |
ims, eptrb, flags, RM1); |
| 837 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH && |
| 838 |
|
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
| 839 |
|
RRETURN(rrc); |
| 840 |
md->capture_last = save_capture_last; |
md->capture_last = save_capture_last; |
| 841 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 842 |
} |
} |
| 897 |
|
|
| 898 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, ims, |
| 899 |
eptrb, flags, RM2); |
eptrb, flags, RM2); |
| 900 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH && |
| 901 |
|
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
| 902 |
|
RRETURN(rrc); |
| 903 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 904 |
} |
} |
| 905 |
/* Control never reaches here. */ |
/* Control never reaches here. */ |
| 1100 |
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); |
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); |
| 1101 |
while (*ecode == OP_ALT) ecode += GET(ecode, 1); |
while (*ecode == OP_ALT) ecode += GET(ecode, 1); |
| 1102 |
} |
} |
| 1103 |
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) |
else if (rrc != MATCH_NOMATCH && |
| 1104 |
|
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
| 1105 |
{ |
{ |
| 1106 |
RRETURN(rrc); /* Need braces because of following else */ |
RRETURN(rrc); /* Need braces because of following else */ |
| 1107 |
} |
} |
| 1229 |
mstart = md->start_match_ptr; /* In case \K reset it */ |
mstart = md->start_match_ptr; /* In case \K reset it */ |
| 1230 |
break; |
break; |
| 1231 |
} |
} |
| 1232 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH && |
| 1233 |
|
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
| 1234 |
|
RRETURN(rrc); |
| 1235 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 1236 |
} |
} |
| 1237 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 1265 |
do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
| 1266 |
break; |
break; |
| 1267 |
} |
} |
| 1268 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH && |
| 1269 |
|
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
| 1270 |
|
RRETURN(rrc); |
| 1271 |
ecode += GET(ecode,1); |
ecode += GET(ecode,1); |
| 1272 |
} |
} |
| 1273 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 1404 |
(pcre_free)(new_recursive.offset_save); |
(pcre_free)(new_recursive.offset_save); |
| 1405 |
MRRETURN(MATCH_MATCH); |
MRRETURN(MATCH_MATCH); |
| 1406 |
} |
} |
| 1407 |
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) |
else if (rrc != MATCH_NOMATCH && |
| 1408 |
|
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
| 1409 |
{ |
{ |
| 1410 |
DPRINTF(("Recursion gave error %d\n", rrc)); |
DPRINTF(("Recursion gave error %d\n", rrc)); |
| 1411 |
if (new_recursive.offset_save != stacksave) |
if (new_recursive.offset_save != stacksave) |
| 1448 |
mstart = md->start_match_ptr; |
mstart = md->start_match_ptr; |
| 1449 |
break; |
break; |
| 1450 |
} |
} |
| 1451 |
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH && |
| 1452 |
|
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
| 1453 |
|
RRETURN(rrc); |
| 1454 |
ecode += GET(ecode,1); |
ecode += GET(ecode,1); |
| 1455 |
} |
} |
| 1456 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 1716 |
if (eptr < md->end_subject) |
if (eptr < md->end_subject) |
| 1717 |
{ if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); } |
{ if (!IS_NEWLINE(eptr)) MRRETURN(MATCH_NOMATCH); } |
| 1718 |
else |
else |
| 1719 |
{ if (md->noteol) MRRETURN(MATCH_NOMATCH); } |
{ |
| 1720 |
|
if (md->noteol) MRRETURN(MATCH_NOMATCH); |
| 1721 |
|
SCHECK_PARTIAL(); |
| 1722 |
|
} |
| 1723 |
ecode++; |
ecode++; |
| 1724 |
break; |
break; |
| 1725 |
} |
} |
| 1726 |
else |
else /* Not multiline */ |
| 1727 |
{ |
{ |
| 1728 |
if (md->noteol) MRRETURN(MATCH_NOMATCH); |
if (md->noteol) MRRETURN(MATCH_NOMATCH); |
| 1729 |
if (!md->endonly) |
if (!md->endonly) goto ASSERT_NL_OR_EOS; |
|
{ |
|
|
if (eptr != md->end_subject && |
|
|
(!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) |
|
|
MRRETURN(MATCH_NOMATCH); |
|
|
ecode++; |
|
|
break; |
|
|
} |
|
| 1730 |
} |
} |
| 1731 |
|
|
| 1732 |
/* ... else fall through for endonly */ |
/* ... else fall through for endonly */ |
| 1733 |
|
|
| 1734 |
/* End of subject assertion (\z) */ |
/* End of subject assertion (\z) */ |
| 1735 |
|
|
| 1736 |
case OP_EOD: |
case OP_EOD: |
| 1737 |
if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH); |
if (eptr < md->end_subject) MRRETURN(MATCH_NOMATCH); |
| 1738 |
|
SCHECK_PARTIAL(); |
| 1739 |
ecode++; |
ecode++; |
| 1740 |
break; |
break; |
| 1741 |
|
|
| 1742 |
/* End of subject or ending \n assertion (\Z) */ |
/* End of subject or ending \n assertion (\Z) */ |
| 1743 |
|
|
| 1744 |
case OP_EODN: |
case OP_EODN: |
| 1745 |
if (eptr != md->end_subject && |
ASSERT_NL_OR_EOS: |
| 1746 |
|
if (eptr < md->end_subject && |
| 1747 |
(!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) |
(!IS_NEWLINE(eptr) || eptr != md->end_subject - md->nllen)) |
| 1748 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 1749 |
|
|
| 1750 |
|
/* Either at end of string or \n before end. */ |
| 1751 |
|
|
| 1752 |
|
SCHECK_PARTIAL(); |
| 1753 |
ecode++; |
ecode++; |
| 1754 |
break; |
break; |
| 1755 |
|
|
| 2263 |
loops). */ |
loops). */ |
| 2264 |
|
|
| 2265 |
case OP_REF: |
case OP_REF: |
| 2266 |
{ |
offset = GET2(ecode, 1) << 1; /* Doubled ref number */ |
| 2267 |
offset = GET2(ecode, 1) << 1; /* Doubled ref number */ |
ecode += 3; |
|
ecode += 3; |
|
| 2268 |
|
|
| 2269 |
/* If the reference is unset, there are two possibilities: |
/* If the reference is unset, there are two possibilities: |
| 2270 |
|
|
| 2271 |
(a) In the default, Perl-compatible state, set the length to be longer |
(a) In the default, Perl-compatible state, set the length negative; |
| 2272 |
than the amount of subject left; this ensures that every attempt at a |
this ensures that every attempt at a match fails. We can't just fail |
| 2273 |
match fails. We can't just fail here, because of the possibility of |
here, because of the possibility of quantifiers with zero minima. |
|
quantifiers with zero minima. |
|
| 2274 |
|
|
| 2275 |
(b) If the JavaScript compatibility flag is set, set the length to zero |
(b) If the JavaScript compatibility flag is set, set the length to zero |
| 2276 |
so that the back reference matches an empty string. |
so that the back reference matches an empty string. |
| 2277 |
|
|
| 2278 |
Otherwise, set the length to the length of what was matched by the |
Otherwise, set the length to the length of what was matched by the |
| 2279 |
referenced subpattern. */ |
referenced subpattern. */ |
| 2280 |
|
|
| 2281 |
if (offset >= offset_top || md->offset_vector[offset] < 0) |
if (offset >= offset_top || md->offset_vector[offset] < 0) |
| 2282 |
length = (md->jscript_compat)? 0 : (int)(md->end_subject - eptr + 1); |
length = (md->jscript_compat)? 0 : -1; |
| 2283 |
else |
else |
| 2284 |
length = md->offset_vector[offset+1] - md->offset_vector[offset]; |
length = md->offset_vector[offset+1] - md->offset_vector[offset]; |
| 2285 |
|
|
| 2286 |
/* Set up for repetition, or handle the non-repeated case */ |
/* Set up for repetition, or handle the non-repeated case */ |
| 2287 |
|
|
| 2288 |
switch (*ecode) |
switch (*ecode) |
| 2289 |
{ |
{ |
| 2290 |
case OP_CRSTAR: |
case OP_CRSTAR: |
| 2291 |
case OP_CRMINSTAR: |
case OP_CRMINSTAR: |
| 2292 |
case OP_CRPLUS: |
case OP_CRPLUS: |
| 2293 |
case OP_CRMINPLUS: |
case OP_CRMINPLUS: |
| 2294 |
case OP_CRQUERY: |
case OP_CRQUERY: |
| 2295 |
case OP_CRMINQUERY: |
case OP_CRMINQUERY: |
| 2296 |
c = *ecode++ - OP_CRSTAR; |
c = *ecode++ - OP_CRSTAR; |
| 2297 |
minimize = (c & 1) != 0; |
minimize = (c & 1) != 0; |
| 2298 |
min = rep_min[c]; /* Pick up values from tables; */ |
min = rep_min[c]; /* Pick up values from tables; */ |
| 2299 |
max = rep_max[c]; /* zero for max => infinity */ |
max = rep_max[c]; /* zero for max => infinity */ |
| 2300 |
if (max == 0) max = INT_MAX; |
if (max == 0) max = INT_MAX; |
| 2301 |
break; |
break; |
| 2302 |
|
|
| 2303 |
case OP_CRRANGE: |
case OP_CRRANGE: |
| 2304 |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
| 2305 |
minimize = (*ecode == OP_CRMINRANGE); |
minimize = (*ecode == OP_CRMINRANGE); |
| 2306 |
min = GET2(ecode, 1); |
min = GET2(ecode, 1); |
| 2307 |
max = GET2(ecode, 3); |
max = GET2(ecode, 3); |
| 2308 |
if (max == 0) max = INT_MAX; |
if (max == 0) max = INT_MAX; |
| 2309 |
ecode += 5; |
ecode += 5; |
| 2310 |
break; |
break; |
| 2311 |
|
|
| 2312 |
default: /* No repeat follows */ |
default: /* No repeat follows */ |
| 2313 |
if (!match_ref(offset, eptr, length, md, ims)) |
if ((length = match_ref(offset, eptr, length, md, ims)) < 0) |
| 2314 |
{ |
{ |
| 2315 |
CHECK_PARTIAL(); |
CHECK_PARTIAL(); |
| 2316 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
|
} |
|
|
eptr += length; |
|
|
continue; /* With the main loop */ |
|
| 2317 |
} |
} |
| 2318 |
|
eptr += length; |
| 2319 |
|
continue; /* With the main loop */ |
| 2320 |
|
} |
| 2321 |
|
|
| 2322 |
/* If the length of the reference is zero, just continue with the |
/* Handle repeated back references. If the length of the reference is |
| 2323 |
main loop. */ |
zero, just continue with the main loop. */ |
| 2324 |
|
|
| 2325 |
if (length == 0) continue; |
if (length == 0) continue; |
| 2326 |
|
|
| 2327 |
/* First, ensure the minimum number of matches are present. We get back |
/* First, ensure the minimum number of matches are present. We get back |
| 2328 |
the length of the reference string explicitly rather than passing the |
the length of the reference string explicitly rather than passing the |
| 2329 |
address of eptr, so that eptr can be a register variable. */ |
address of eptr, so that eptr can be a register variable. */ |
| 2330 |
|
|
| 2331 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 2332 |
|
{ |
| 2333 |
|
int slength; |
| 2334 |
|
if ((slength = match_ref(offset, eptr, length, md, ims)) < 0) |
| 2335 |
{ |
{ |
| 2336 |
if (!match_ref(offset, eptr, length, md, ims)) |
CHECK_PARTIAL(); |
| 2337 |
{ |
MRRETURN(MATCH_NOMATCH); |
|
CHECK_PARTIAL(); |
|
|
MRRETURN(MATCH_NOMATCH); |
|
|
} |
|
|
eptr += length; |
|
| 2338 |
} |
} |
| 2339 |
|
eptr += slength; |
| 2340 |
|
} |
| 2341 |
|
|
| 2342 |
/* If min = max, continue at the same level without recursion. |
/* If min = max, continue at the same level without recursion. |
| 2343 |
They are not both allowed to be zero. */ |
They are not both allowed to be zero. */ |
| 2344 |
|
|
| 2345 |
if (min == max) continue; |
if (min == max) continue; |
| 2346 |
|
|
| 2347 |
/* If minimizing, keep trying and advancing the pointer */ |
/* If minimizing, keep trying and advancing the pointer */ |
| 2348 |
|
|
| 2349 |
if (minimize) |
if (minimize) |
| 2350 |
|
{ |
| 2351 |
|
for (fi = min;; fi++) |
| 2352 |
{ |
{ |
| 2353 |
for (fi = min;; fi++) |
int slength; |
| 2354 |
|
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14); |
| 2355 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 2356 |
|
if (fi >= max) MRRETURN(MATCH_NOMATCH); |
| 2357 |
|
if ((slength = match_ref(offset, eptr, length, md, ims)) < 0) |
| 2358 |
{ |
{ |
| 2359 |
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM14); |
CHECK_PARTIAL(); |
| 2360 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
MRRETURN(MATCH_NOMATCH); |
|
if (fi >= max) MRRETURN(MATCH_NOMATCH); |
|
|
if (!match_ref(offset, eptr, length, md, ims)) |
|
|
{ |
|
|
CHECK_PARTIAL(); |
|
|
MRRETURN(MATCH_NOMATCH); |
|
|
} |
|
|
eptr += length; |
|
| 2361 |
} |
} |
| 2362 |
/* Control never gets here */ |
eptr += slength; |
| 2363 |
} |
} |
| 2364 |
|
/* Control never gets here */ |
| 2365 |
|
} |
| 2366 |
|
|
| 2367 |
/* If maximizing, find the longest string and work backwards */ |
/* If maximizing, find the longest string and work backwards */ |
| 2368 |
|
|
| 2369 |
else |
else |
| 2370 |
|
{ |
| 2371 |
|
pp = eptr; |
| 2372 |
|
for (i = min; i < max; i++) |
| 2373 |
{ |
{ |
| 2374 |
pp = eptr; |
int slength; |
| 2375 |
for (i = min; i < max; i++) |
if ((slength = match_ref(offset, eptr, length, md, ims)) < 0) |
|
{ |
|
|
if (!match_ref(offset, eptr, length, md, ims)) |
|
|
{ |
|
|
CHECK_PARTIAL(); |
|
|
break; |
|
|
} |
|
|
eptr += length; |
|
|
} |
|
|
while (eptr >= pp) |
|
| 2376 |
{ |
{ |
| 2377 |
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15); |
CHECK_PARTIAL(); |
| 2378 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
break; |
|
eptr -= length; |
|
| 2379 |
} |
} |
| 2380 |
MRRETURN(MATCH_NOMATCH); |
eptr += slength; |
| 2381 |
|
} |
| 2382 |
|
while (eptr >= pp) |
| 2383 |
|
{ |
| 2384 |
|
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM15); |
| 2385 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 2386 |
|
eptr -= length; |
| 2387 |
} |
} |
| 2388 |
|
MRRETURN(MATCH_NOMATCH); |
| 2389 |
} |
} |
| 2390 |
/* Control never gets here */ |
/* Control never gets here */ |
| 2391 |
|
|
| 5645 |
if (re == NULL || subject == NULL || |
if (re == NULL || subject == NULL || |
| 5646 |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
| 5647 |
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; |
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; |
| 5648 |
|
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; |
| 5649 |
|
|
| 5650 |
/* This information is for finding all the numbers associated with a given |
/* This information is for finding all the numbers associated with a given |
| 5651 |
name, for condition testing. */ |
name, for condition testing. */ |
| 5812 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 5813 |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
| 5814 |
{ |
{ |
| 5815 |
if (_pcre_valid_utf8((USPTR)subject, length) >= 0) |
int tb; |
| 5816 |
return PCRE_ERROR_BADUTF8; |
if ((tb = _pcre_valid_utf8((USPTR)subject, length)) >= 0) |
| 5817 |
|
return (tb == length && md->partial > 1)? |
| 5818 |
|
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; |
| 5819 |
if (start_offset > 0 && start_offset < length) |
if (start_offset > 0 && start_offset < length) |
| 5820 |
{ |
{ |
| 5821 |
int tb = ((USPTR)subject)[start_offset]; |
tb = ((USPTR)subject)[start_offset] & 0xc0; |
| 5822 |
if (tb > 127) |
if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET; |
|
{ |
|
|
tb &= 0xc0; |
|
|
if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET; |
|
|
} |
|
| 5823 |
} |
} |
| 5824 |
} |
} |
| 5825 |
#endif |
#endif |
| 5947 |
/* There are some optimizations that avoid running the match if a known |
/* There are some optimizations that avoid running the match if a known |
| 5948 |
starting point is not found, or if a known later character is not present. |
starting point is not found, or if a known later character is not present. |
| 5949 |
However, there is an option that disables these, for testing and for ensuring |
However, there is an option that disables these, for testing and for ensuring |
| 5950 |
that all callouts do actually occur. */ |
that all callouts do actually occur. The option can be set in the regex by |
| 5951 |
|
(*NO_START_OPT) or passed in match-time options. */ |
| 5952 |
|
|
| 5953 |
if ((options & PCRE_NO_START_OPTIMIZE) == 0) |
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0) |
| 5954 |
{ |
{ |
| 5955 |
/* Advance to a unique first byte if there is one. */ |
/* Advance to a unique first byte if there is one. */ |
| 5956 |
|
|