| 76 |
#define MATCH_ACCEPT (-999) |
#define MATCH_ACCEPT (-999) |
| 77 |
#define MATCH_COMMIT (-998) |
#define MATCH_COMMIT (-998) |
| 78 |
#define MATCH_KETRPOS (-997) |
#define MATCH_KETRPOS (-997) |
| 79 |
#define MATCH_PRUNE (-996) |
#define MATCH_ONCE (-996) |
| 80 |
#define MATCH_SKIP (-995) |
#define MATCH_PRUNE (-995) |
| 81 |
#define MATCH_SKIP_ARG (-994) |
#define MATCH_SKIP (-994) |
| 82 |
#define MATCH_THEN (-993) |
#define MATCH_SKIP_ARG (-993) |
| 83 |
|
#define MATCH_THEN (-992) |
| 84 |
|
|
| 85 |
/* This is a convenience macro for code that occurs many times. */ |
/* This is a convenience macro for code that occurs many times. */ |
| 86 |
|
|
| 277 |
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, |
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, |
| 278 |
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, |
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, |
| 279 |
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, |
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, |
| 280 |
RM61, RM62, RM63}; |
RM61, RM62, RM63 }; |
| 281 |
|
|
| 282 |
/* These versions of the macros use the stack, as normal. There are debugging |
/* These versions of the macros use the stack, as normal. There are debugging |
| 283 |
versions and production versions. Note that the "rw" argument of RMATCH isn't |
versions and production versions. Note that the "rw" argument of RMATCH isn't |
| 384 |
int Xprop_type; |
int Xprop_type; |
| 385 |
int Xprop_value; |
int Xprop_value; |
| 386 |
int Xprop_fail_result; |
int Xprop_fail_result; |
|
int Xprop_category; |
|
|
int Xprop_chartype; |
|
|
int Xprop_script; |
|
| 387 |
int Xoclength; |
int Xoclength; |
| 388 |
uschar Xocchars[8]; |
uschar Xocchars[8]; |
| 389 |
#endif |
#endif |
| 548 |
#define prop_type frame->Xprop_type |
#define prop_type frame->Xprop_type |
| 549 |
#define prop_value frame->Xprop_value |
#define prop_value frame->Xprop_value |
| 550 |
#define prop_fail_result frame->Xprop_fail_result |
#define prop_fail_result frame->Xprop_fail_result |
|
#define prop_category frame->Xprop_category |
|
|
#define prop_chartype frame->Xprop_chartype |
|
|
#define prop_script frame->Xprop_script |
|
| 551 |
#define oclength frame->Xoclength |
#define oclength frame->Xoclength |
| 552 |
#define occhars frame->Xocchars |
#define occhars frame->Xocchars |
| 553 |
#endif |
#endif |
| 605 |
int prop_type; |
int prop_type; |
| 606 |
int prop_value; |
int prop_value; |
| 607 |
int prop_fail_result; |
int prop_fail_result; |
|
int prop_category; |
|
|
int prop_chartype; |
|
|
int prop_script; |
|
| 608 |
int oclength; |
int oclength; |
| 609 |
uschar occhars[8]; |
uschar occhars[8]; |
| 610 |
#endif |
#endif |
| 837 |
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
| 838 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
| 839 |
eptrb, RM1); |
eptrb, RM1); |
| 840 |
|
if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ |
| 841 |
if (rrc != MATCH_NOMATCH && |
if (rrc != MATCH_NOMATCH && |
| 842 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
| 843 |
RRETURN(rrc); |
RRETURN(rrc); |
| 847 |
} |
} |
| 848 |
|
|
| 849 |
DPRINTF(("bracket %d failed\n", number)); |
DPRINTF(("bracket %d failed\n", number)); |
|
|
|
| 850 |
md->offset_vector[offset] = save_offset1; |
md->offset_vector[offset] = save_offset1; |
| 851 |
md->offset_vector[offset+1] = save_offset2; |
md->offset_vector[offset+1] = save_offset2; |
| 852 |
md->offset_vector[md->offset_end - number] = save_offset3; |
md->offset_vector[md->offset_end - number] = save_offset3; |
| 853 |
|
|
| 854 |
|
/* At this point, rrc will be one of MATCH_ONCE, MATCH_NOMATCH, or |
| 855 |
|
MATCH_THEN. */ |
| 856 |
|
|
| 857 |
if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr; |
if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr; |
| 858 |
RRETURN(MATCH_NOMATCH); |
RRETURN(((rrc == MATCH_ONCE)? MATCH_ONCE:MATCH_NOMATCH)); |
| 859 |
} |
} |
| 860 |
|
|
| 861 |
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat |
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat |
| 869 |
/* VVVVVVVVVVVVVVVVVVVVVVVVV */ |
/* VVVVVVVVVVVVVVVVVVVVVVVVV */ |
| 870 |
/* VVVVVVVVVVVVVVVVVVVVVVVVV */ |
/* VVVVVVVVVVVVVVVVVVVVVVVVV */ |
| 871 |
|
|
| 872 |
/* Non-capturing bracket, except for possessive with unlimited repeat. Loop |
/* Non-capturing or atomic group, except for possessive with unlimited |
| 873 |
for all the alternatives. When we get to the final alternative within the |
repeat. Loop for all the alternatives. When we get to the final alternative |
| 874 |
brackets, we used to return the result of a recursive call to match() |
within the brackets, we used to return the result of a recursive call to |
| 875 |
whatever happened so it was possible to reduce stack usage by turning this |
match() whatever happened so it was possible to reduce stack usage by |
| 876 |
into a tail recursion, except in the case of a possibly empty group. |
turning this into a tail recursion, except in the case of a possibly empty |
| 877 |
However, now that there is the possiblity of (*THEN) occurring in the final |
group. However, now that there is the possiblity of (*THEN) occurring in |
| 878 |
alternative, this optimization is no longer possible. */ |
the final alternative, this optimization is no longer possible. |
| 879 |
|
|
| 880 |
|
MATCH_ONCE is returned when the end of an atomic group is successfully |
| 881 |
|
reached, but subsequent matching fails. It passes back up the tree (causing |
| 882 |
|
captured values to be reset) until the original atomic group level is |
| 883 |
|
reached. This is tested by comparing md->once_target with the start of the |
| 884 |
|
group. At this point, the return is converted into MATCH_NOMATCH so that |
| 885 |
|
previous backup points can be taken. */ |
| 886 |
|
|
| 887 |
|
case OP_ONCE: |
| 888 |
case OP_BRA: |
case OP_BRA: |
| 889 |
case OP_SBRA: |
case OP_SBRA: |
| 890 |
DPRINTF(("start non-capturing bracket\n")); |
DPRINTF(("start non-capturing bracket\n")); |
| 891 |
|
|
| 892 |
for (;;) |
for (;;) |
| 893 |
{ |
{ |
| 894 |
if (op >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP; |
| 895 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb, |
| 896 |
RM2); |
RM2); |
| 897 |
if (rrc != MATCH_NOMATCH && |
if (rrc != MATCH_NOMATCH && |
| 898 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
| 899 |
|
{ |
| 900 |
|
if (rrc == MATCH_ONCE) |
| 901 |
|
{ |
| 902 |
|
const uschar *scode = ecode; |
| 903 |
|
if (*scode != OP_ONCE) /* If not at start, find it */ |
| 904 |
|
{ |
| 905 |
|
while (*scode == OP_ALT) scode += GET(scode, 1); |
| 906 |
|
scode -= GET(scode, 1); |
| 907 |
|
} |
| 908 |
|
if (md->once_target == scode) rrc = MATCH_NOMATCH; |
| 909 |
|
} |
| 910 |
RRETURN(rrc); |
RRETURN(rrc); |
| 911 |
|
} |
| 912 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 913 |
if (*ecode != OP_ALT) break; |
if (*ecode != OP_ALT) break; |
| 914 |
} |
} |
|
|
|
| 915 |
if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr; |
if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr; |
| 916 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
| 917 |
|
|
| 1244 |
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3); |
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, NULL, RM3); |
| 1245 |
if (rrc == MATCH_MATCH) |
if (rrc == MATCH_MATCH) |
| 1246 |
{ |
{ |
| 1247 |
|
if (md->end_offset_top > offset_top) |
| 1248 |
|
offset_top = md->end_offset_top; /* Captures may have happened */ |
| 1249 |
condition = TRUE; |
condition = TRUE; |
| 1250 |
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); |
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); |
| 1251 |
while (*ecode == OP_ALT) ecode += GET(ecode, 1); |
while (*ecode == OP_ALT) ecode += GET(ecode, 1); |
| 1309 |
break; |
break; |
| 1310 |
|
|
| 1311 |
|
|
| 1312 |
/* End of the pattern, either real or forced. If we are in a recursion, we |
/* End of the pattern, either real or forced. */ |
|
should restore the offsets appropriately, and if it's a top-level |
|
|
recursion, continue from after the call. */ |
|
| 1313 |
|
|
| 1314 |
|
case OP_END: |
| 1315 |
case OP_ACCEPT: |
case OP_ACCEPT: |
| 1316 |
case OP_ASSERT_ACCEPT: |
case OP_ASSERT_ACCEPT: |
| 1317 |
case OP_END: |
|
| 1318 |
if (md->recursive != NULL) |
/* If we have matched an empty string, fail if not in an assertion and not |
| 1319 |
{ |
in a recursion if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART |
|
recursion_info *rec = md->recursive; |
|
|
md->recursive = rec->prevrec; |
|
|
memmove(md->offset_vector, rec->offset_save, |
|
|
rec->saved_max * sizeof(int)); |
|
|
offset_top = rec->save_offset_top; |
|
|
if (rec->group_num == 0) |
|
|
{ |
|
|
ecode = rec->after_call; |
|
|
break; |
|
|
} |
|
|
} |
|
|
|
|
|
/* Otherwise, if we have matched an empty string, fail if not in an |
|
|
assertion and if either PCRE_NOTEMPTY is set, or if PCRE_NOTEMPTY_ATSTART |
|
| 1320 |
is set and we have matched at the start of the subject. In both cases, |
is set and we have matched at the start of the subject. In both cases, |
| 1321 |
backtracking will then try other alternatives, if any. */ |
backtracking will then try other alternatives, if any. */ |
| 1322 |
|
|
| 1323 |
else if (eptr == mstart && op != OP_ASSERT_ACCEPT && |
if (eptr == mstart && op != OP_ASSERT_ACCEPT && |
| 1324 |
(md->notempty || |
md->recursive == NULL && |
| 1325 |
(md->notempty_atstart && |
(md->notempty || |
| 1326 |
mstart == md->start_subject + md->start_offset))) |
(md->notempty_atstart && |
| 1327 |
|
mstart == md->start_subject + md->start_offset))) |
| 1328 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 1329 |
|
|
| 1330 |
/* Otherwise, we have a match. */ |
/* Otherwise, we have a match. */ |
| 1484 |
/* Recursion either matches the current regex, or some subexpression. The |
/* Recursion either matches the current regex, or some subexpression. The |
| 1485 |
offset data is the offset to the starting bracket from the start of the |
offset data is the offset to the starting bracket from the start of the |
| 1486 |
whole pattern. (This is so that it works from duplicated subpatterns.) |
whole pattern. (This is so that it works from duplicated subpatterns.) |
| 1487 |
|
|
| 1488 |
If there are any capturing brackets started but not finished, we have to |
The state of the capturing groups is preserved over recursion, and |
| 1489 |
save their starting points and reinstate them after the recursion. However, |
re-instated afterwards. We don't know how many are started and not yet |
| 1490 |
we don't know how many such there are (offset_top records the completed |
finished (offset_top records the completed total) so we just have to save |
| 1491 |
total) so we just have to save all the potential data. There may be up to |
all the potential data. There may be up to 65535 such values, which is too |
| 1492 |
65535 such values, which is too large to put on the stack, but using malloc |
large to put on the stack, but using malloc for small numbers seems |
| 1493 |
for small numbers seems expensive. As a compromise, the stack is used when |
expensive. As a compromise, the stack is used when there are no more than |
| 1494 |
there are no more than REC_STACK_SAVE_MAX values to store; otherwise malloc |
REC_STACK_SAVE_MAX values to store; otherwise malloc is used. |
|
is used. |
|
| 1495 |
|
|
| 1496 |
There are also other values that have to be saved. We use a chained |
There are also other values that have to be saved. We use a chained |
| 1497 |
sequence of blocks that actually live on the stack. Thanks to Robin Houston |
sequence of blocks that actually live on the stack. Thanks to Robin Houston |
| 1498 |
for the original version of this logic. */ |
for the original version of this logic. It has, however, been hacked around |
| 1499 |
|
a lot, so he is not to blame for the current way it works. */ |
| 1500 |
|
|
| 1501 |
case OP_RECURSE: |
case OP_RECURSE: |
| 1502 |
{ |
{ |
| 1509 |
new_recursive.prevrec = md->recursive; |
new_recursive.prevrec = md->recursive; |
| 1510 |
md->recursive = &new_recursive; |
md->recursive = &new_recursive; |
| 1511 |
|
|
| 1512 |
/* Find where to continue from afterwards */ |
/* Where to continue from afterwards */ |
| 1513 |
|
|
| 1514 |
ecode += 1 + LINK_SIZE; |
ecode += 1 + LINK_SIZE; |
|
new_recursive.after_call = ecode; |
|
| 1515 |
|
|
| 1516 |
/* Now save the offset data. */ |
/* Now save the offset data */ |
| 1517 |
|
|
| 1518 |
new_recursive.saved_max = md->offset_end; |
new_recursive.saved_max = md->offset_end; |
| 1519 |
if (new_recursive.saved_max <= REC_STACK_SAVE_MAX) |
if (new_recursive.saved_max <= REC_STACK_SAVE_MAX) |
| 1524 |
(int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); |
(int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); |
| 1525 |
if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); |
if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); |
| 1526 |
} |
} |
|
|
|
| 1527 |
memcpy(new_recursive.offset_save, md->offset_vector, |
memcpy(new_recursive.offset_save, md->offset_vector, |
| 1528 |
new_recursive.saved_max * sizeof(int)); |
new_recursive.saved_max * sizeof(int)); |
|
new_recursive.save_offset_top = offset_top; |
|
| 1529 |
|
|
| 1530 |
/* OK, now we can do the recursion. For each top-level alternative we |
/* OK, now we can do the recursion. After processing each alternative, |
| 1531 |
restore the offset and recursion data. */ |
restore the offset data. If there were nested recursions, md->recursive |
| 1532 |
|
might be changed, so reset it before looping. */ |
| 1533 |
|
|
| 1534 |
DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); |
DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); |
| 1535 |
cbegroup = (*callpat >= OP_SBRA); |
cbegroup = (*callpat >= OP_SBRA); |
| 1538 |
if (cbegroup) md->match_function_type = MATCH_CBEGROUP; |
if (cbegroup) md->match_function_type = MATCH_CBEGROUP; |
| 1539 |
RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top, |
RMATCH(eptr, callpat + _pcre_OP_lengths[*callpat], offset_top, |
| 1540 |
md, eptrb, RM6); |
md, eptrb, RM6); |
| 1541 |
|
memcpy(md->offset_vector, new_recursive.offset_save, |
| 1542 |
|
new_recursive.saved_max * sizeof(int)); |
| 1543 |
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
| 1544 |
{ |
{ |
| 1545 |
DPRINTF(("Recursion matched\n")); |
DPRINTF(("Recursion matched\n")); |
| 1546 |
md->recursive = new_recursive.prevrec; |
md->recursive = new_recursive.prevrec; |
| 1547 |
if (new_recursive.offset_save != stacksave) |
if (new_recursive.offset_save != stacksave) |
| 1548 |
(pcre_free)(new_recursive.offset_save); |
(pcre_free)(new_recursive.offset_save); |
| 1549 |
MRRETURN(MATCH_MATCH); |
|
| 1550 |
|
/* Set where we got to in the subject, and reset the start in case |
| 1551 |
|
it was changed by \K. This *is* propagated back out of a recursion, |
| 1552 |
|
for Perl compatibility. */ |
| 1553 |
|
|
| 1554 |
|
eptr = md->end_match_ptr; |
| 1555 |
|
mstart = md->start_match_ptr; |
| 1556 |
|
goto RECURSION_MATCHED; /* Exit loop; end processing */ |
| 1557 |
} |
} |
| 1558 |
else if (rrc != MATCH_NOMATCH && |
else if (rrc != MATCH_NOMATCH && |
| 1559 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
| 1565 |
} |
} |
| 1566 |
|
|
| 1567 |
md->recursive = &new_recursive; |
md->recursive = &new_recursive; |
|
memcpy(md->offset_vector, new_recursive.offset_save, |
|
|
new_recursive.saved_max * sizeof(int)); |
|
| 1568 |
callpat += GET(callpat, 1); |
callpat += GET(callpat, 1); |
| 1569 |
} |
} |
| 1570 |
while (*callpat == OP_ALT); |
while (*callpat == OP_ALT); |
| 1575 |
(pcre_free)(new_recursive.offset_save); |
(pcre_free)(new_recursive.offset_save); |
| 1576 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 1577 |
} |
} |
| 1578 |
/* Control never reaches here */ |
|
| 1579 |
|
RECURSION_MATCHED: |
| 1580 |
/* "Once" brackets are like assertion brackets except that after a match, |
break; |
|
the point in the subject string is not moved back. Thus there can never be |
|
|
a move back into the brackets. Friedl calls these "atomic" subpatterns. |
|
|
Check the alternative branches in turn - the matching won't pass the KET |
|
|
for this kind of subpattern. If any one branch matches, we carry on as at |
|
|
the end of a normal bracket, leaving the subject pointer, but resetting |
|
|
the start-of-match value in case it was changed by \K. */ |
|
|
|
|
|
case OP_ONCE: |
|
|
prev = ecode; |
|
|
saved_eptr = eptr; |
|
|
|
|
|
do |
|
|
{ |
|
|
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7); |
|
|
if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */ |
|
|
{ |
|
|
mstart = md->start_match_ptr; |
|
|
break; |
|
|
} |
|
|
if (rrc != MATCH_NOMATCH && |
|
|
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
|
|
RRETURN(rrc); |
|
|
ecode += GET(ecode,1); |
|
|
} |
|
|
while (*ecode == OP_ALT); |
|
|
|
|
|
/* If hit the end of the group (which could be repeated), fail */ |
|
|
|
|
|
if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH); |
|
|
|
|
|
/* Continue after the group, updating the offsets high water mark, since |
|
|
extracts may have been taken. */ |
|
|
|
|
|
do ecode += GET(ecode, 1); while (*ecode == OP_ALT); |
|
|
|
|
|
offset_top = md->end_offset_top; |
|
|
eptr = md->end_match_ptr; |
|
|
|
|
|
/* For a non-repeating ket, just continue at this level. This also |
|
|
happens for a repeating ket if no characters were matched in the group. |
|
|
This is the forcible breaking of infinite loops as implemented in Perl |
|
|
5.005. */ |
|
|
|
|
|
if (*ecode == OP_KET || eptr == saved_eptr) |
|
|
{ |
|
|
ecode += 1+LINK_SIZE; |
|
|
break; |
|
|
} |
|
|
|
|
|
/* The repeating kets try the rest of the pattern or restart from the |
|
|
preceding bracket, in the appropriate order. The second "call" of match() |
|
|
uses tail recursion, to avoid using another stack frame. */ |
|
|
|
|
|
if (*ecode == OP_KETRMIN) |
|
|
{ |
|
|
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM8); |
|
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
|
|
ecode = prev; |
|
|
} |
|
|
else /* OP_KETRMAX */ |
|
|
{ |
|
|
md->match_function_type = MATCH_CBEGROUP; |
|
|
RMATCH(eptr, prev, offset_top, md, eptrb, RM9); |
|
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
|
|
ecode += 1 + LINK_SIZE; |
|
|
} |
|
|
goto TAIL_RECURSE; |
|
|
|
|
|
/* Control never gets here */ |
|
| 1581 |
|
|
| 1582 |
/* An alternation is the end of a branch; scan along to find the end of the |
/* An alternation is the end of a branch; scan along to find the end of the |
| 1583 |
bracketed group and go to there. */ |
bracketed group and go to there. */ |
| 1630 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 1631 |
case OP_KETRPOS: |
case OP_KETRPOS: |
| 1632 |
prev = ecode - GET(ecode, 1); |
prev = ecode - GET(ecode, 1); |
| 1633 |
|
|
| 1634 |
/* If this was a group that remembered the subject start, in order to break |
/* If this was a group that remembered the subject start, in order to break |
| 1635 |
infinite repeats of empty string matches, retrieve the subject start from |
infinite repeats of empty string matches, retrieve the subject start from |
| 1636 |
the chain. Otherwise, set it NULL. */ |
the chain. Otherwise, set it NULL. */ |
| 1637 |
|
|
| 1638 |
if (*prev >= OP_SBRA) |
if (*prev >= OP_SBRA || *prev == OP_ONCE) |
| 1639 |
{ |
{ |
| 1640 |
saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */ |
saved_eptr = eptrb->epb_saved_eptr; /* Value at start of group */ |
| 1641 |
eptrb = eptrb->epb_prev; /* Backup to previous group */ |
eptrb = eptrb->epb_prev; /* Backup to previous group */ |
| 1642 |
} |
} |
| 1643 |
else saved_eptr = NULL; |
else saved_eptr = NULL; |
| 1644 |
|
|
| 1645 |
/* If we are at the end of an assertion group or an atomic group, stop |
/* If we are at the end of an assertion group, stop matching and return |
| 1646 |
matching and return MATCH_MATCH, but record the current high water mark for |
MATCH_MATCH, but record the current high water mark for use by positive |
| 1647 |
use by positive assertions. We also need to record the match start in case |
assertions. We also need to record the match start in case it was changed |
| 1648 |
it was changed by \K. */ |
by \K. */ |
| 1649 |
|
|
| 1650 |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || |
| 1651 |
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || |
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT) |
|
*prev == OP_ONCE) |
|
| 1652 |
{ |
{ |
| 1653 |
md->end_match_ptr = eptr; /* For ONCE */ |
md->end_match_ptr = eptr; /* For ONCE */ |
| 1654 |
md->end_offset_top = offset_top; |
md->end_offset_top = offset_top; |
| 1658 |
|
|
| 1659 |
/* For capturing groups we have to check the group number back at the start |
/* For capturing groups we have to check the group number back at the start |
| 1660 |
and if necessary complete handling an extraction by setting the offsets and |
and if necessary complete handling an extraction by setting the offsets and |
| 1661 |
bumping the high water mark. Note that whole-pattern recursion is coded as |
bumping the high water mark. Whole-pattern recursion is coded as a recurse |
| 1662 |
a recurse into group 0, so it won't be picked up here. Instead, we catch it |
into group 0, so it won't be picked up here. Instead, we catch it when the |
| 1663 |
when the OP_END is reached. Other recursion is handled here. */ |
OP_END is reached. Other recursion is handled here. We just have to record |
| 1664 |
|
the current subject position and start match pointer and give a MATCH |
| 1665 |
|
return. */ |
| 1666 |
|
|
| 1667 |
if (*prev == OP_CBRA || *prev == OP_SCBRA || |
if (*prev == OP_CBRA || *prev == OP_SCBRA || |
| 1668 |
*prev == OP_CBRAPOS || *prev == OP_SCBRAPOS) |
*prev == OP_CBRAPOS || *prev == OP_SCBRAPOS) |
| 1675 |
printf("\n"); |
printf("\n"); |
| 1676 |
#endif |
#endif |
| 1677 |
|
|
| 1678 |
|
/* Handle a recursively called group. */ |
| 1679 |
|
|
| 1680 |
|
if (md->recursive != NULL && md->recursive->group_num == number) |
| 1681 |
|
{ |
| 1682 |
|
md->end_match_ptr = eptr; |
| 1683 |
|
md->start_match_ptr = mstart; |
| 1684 |
|
RRETURN(MATCH_MATCH); |
| 1685 |
|
} |
| 1686 |
|
|
| 1687 |
|
/* Deal with capturing */ |
| 1688 |
|
|
| 1689 |
md->capture_last = number; |
md->capture_last = number; |
| 1690 |
if (offset >= md->offset_max) md->offset_overflow = TRUE; else |
if (offset >= md->offset_max) md->offset_overflow = TRUE; else |
| 1691 |
{ |
{ |
| 1713 |
md->offset_vector[offset+1] = (int)(eptr - md->start_subject); |
md->offset_vector[offset+1] = (int)(eptr - md->start_subject); |
| 1714 |
if (offset_top <= offset) offset_top = offset + 2; |
if (offset_top <= offset) offset_top = offset + 2; |
| 1715 |
} |
} |
|
|
|
|
/* Handle a recursively called group. Restore the offsets |
|
|
appropriately and continue from after the call. */ |
|
|
|
|
|
if (md->recursive != NULL && md->recursive->group_num == number) |
|
|
{ |
|
|
recursion_info *rec = md->recursive; |
|
|
DPRINTF(("Recursion (%d) succeeded - continuing\n", number)); |
|
|
md->recursive = rec->prevrec; |
|
|
memcpy(md->offset_vector, rec->offset_save, |
|
|
rec->saved_max * sizeof(int)); |
|
|
offset_top = rec->save_offset_top; |
|
|
ecode = rec->after_call; |
|
|
break; |
|
|
} |
|
| 1716 |
} |
} |
| 1717 |
|
|
| 1718 |
/* For a non-repeating ket, just continue at this level. This also |
/* For an ordinary non-repeating ket, just continue at this level. This |
| 1719 |
happens for a repeating ket if no characters were matched in the group. |
also happens for a repeating ket if no characters were matched in the |
| 1720 |
This is the forcible breaking of infinite loops as implemented in Perl |
group. This is the forcible breaking of infinite loops as implemented in |
| 1721 |
5.005. If there is an options reset, it will get obeyed in the normal |
Perl 5.005. For a non-repeating atomic group, establish a backup point by |
| 1722 |
course of events. */ |
processing the rest of the pattern at a lower level. If this results in a |
| 1723 |
|
NOMATCH return, pass MATCH_ONCE back to the original OP_ONCE level, thereby |
| 1724 |
|
bypassing intermediate backup points, but resetting any captures that |
| 1725 |
|
happened along the way. */ |
| 1726 |
|
|
| 1727 |
if (*ecode == OP_KET || eptr == saved_eptr) |
if (*ecode == OP_KET || eptr == saved_eptr) |
| 1728 |
{ |
{ |
| 1729 |
ecode += 1 + LINK_SIZE; |
if (*prev == OP_ONCE) |
| 1730 |
|
{ |
| 1731 |
|
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12); |
| 1732 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 1733 |
|
md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */ |
| 1734 |
|
RRETURN(MATCH_ONCE); |
| 1735 |
|
} |
| 1736 |
|
ecode += 1 + LINK_SIZE; /* Carry on at this level */ |
| 1737 |
break; |
break; |
| 1738 |
} |
} |
| 1739 |
|
|
| 1751 |
/* The normal repeating kets try the rest of the pattern or restart from |
/* The normal repeating kets try the rest of the pattern or restart from |
| 1752 |
the preceding bracket, in the appropriate order. In the second case, we can |
the preceding bracket, in the appropriate order. In the second case, we can |
| 1753 |
use tail recursion to avoid using another stack frame, unless we have an |
use tail recursion to avoid using another stack frame, unless we have an |
| 1754 |
unlimited repeat of a group that can match an empty string. */ |
an atomic group or an unlimited repeat of a group that can match an empty |
| 1755 |
|
string. */ |
| 1756 |
|
|
| 1757 |
if (*ecode == OP_KETRMIN) |
if (*ecode == OP_KETRMIN) |
| 1758 |
{ |
{ |
| 1759 |
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM12); |
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM7); |
| 1760 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 1761 |
|
if (*prev == OP_ONCE) |
| 1762 |
|
{ |
| 1763 |
|
RMATCH(eptr, prev, offset_top, md, eptrb, RM8); |
| 1764 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 1765 |
|
md->once_target = prev; /* Level at which to change to MATCH_NOMATCH */ |
| 1766 |
|
RRETURN(MATCH_ONCE); |
| 1767 |
|
} |
| 1768 |
if (*prev >= OP_SBRA) /* Could match an empty string */ |
if (*prev >= OP_SBRA) /* Could match an empty string */ |
| 1769 |
{ |
{ |
| 1770 |
md->match_function_type = MATCH_CBEGROUP; |
md->match_function_type = MATCH_CBEGROUP; |
| 1778 |
{ |
{ |
| 1779 |
if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
if (*prev >= OP_SBRA) md->match_function_type = MATCH_CBEGROUP; |
| 1780 |
RMATCH(eptr, prev, offset_top, md, eptrb, RM13); |
RMATCH(eptr, prev, offset_top, md, eptrb, RM13); |
| 1781 |
|
if (rrc == MATCH_ONCE && md->once_target == prev) rrc = MATCH_NOMATCH; |
| 1782 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 1783 |
|
if (*prev == OP_ONCE) |
| 1784 |
|
{ |
| 1785 |
|
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM9); |
| 1786 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 1787 |
|
md->once_target = prev; |
| 1788 |
|
RRETURN(MATCH_ONCE); |
| 1789 |
|
} |
| 1790 |
ecode += 1 + LINK_SIZE; |
ecode += 1 + LINK_SIZE; |
| 1791 |
goto TAIL_RECURSE; |
goto TAIL_RECURSE; |
| 1792 |
} |
} |
| 2355 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 2356 |
} |
} |
| 2357 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 2358 |
|
if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH); |
| 2359 |
|
while (eptr < md->end_subject) |
| 2360 |
{ |
{ |
| 2361 |
int category = UCD_CATEGORY(c); |
int len = 1; |
| 2362 |
if (category == ucp_M) MRRETURN(MATCH_NOMATCH); |
if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
| 2363 |
while (eptr < md->end_subject) |
if (UCD_CATEGORY(c) != ucp_M) break; |
| 2364 |
{ |
eptr += len; |
|
int len = 1; |
|
|
if (!utf8) c = *eptr; else |
|
|
{ |
|
|
GETCHARLEN(c, eptr, len); |
|
|
} |
|
|
category = UCD_CATEGORY(c); |
|
|
if (category != ucp_M) break; |
|
|
eptr += len; |
|
|
} |
|
| 2365 |
} |
} |
| 2366 |
ecode++; |
ecode++; |
| 2367 |
break; |
break; |
| 3715 |
case PT_LAMP: |
case PT_LAMP: |
| 3716 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 3717 |
{ |
{ |
| 3718 |
|
int chartype; |
| 3719 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 3720 |
{ |
{ |
| 3721 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
| 3722 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3723 |
} |
} |
| 3724 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 3725 |
prop_chartype = UCD_CHARTYPE(c); |
chartype = UCD_CHARTYPE(c); |
| 3726 |
if ((prop_chartype == ucp_Lu || |
if ((chartype == ucp_Lu || |
| 3727 |
prop_chartype == ucp_Ll || |
chartype == ucp_Ll || |
| 3728 |
prop_chartype == ucp_Lt) == prop_fail_result) |
chartype == ucp_Lt) == prop_fail_result) |
| 3729 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3730 |
} |
} |
| 3731 |
break; |
break; |
| 3739 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3740 |
} |
} |
| 3741 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 3742 |
prop_category = UCD_CATEGORY(c); |
if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) |
|
if ((prop_category == prop_value) == prop_fail_result) |
|
| 3743 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3744 |
} |
} |
| 3745 |
break; |
break; |
| 3753 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3754 |
} |
} |
| 3755 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 3756 |
prop_chartype = UCD_CHARTYPE(c); |
if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) |
|
if ((prop_chartype == prop_value) == prop_fail_result) |
|
| 3757 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3758 |
} |
} |
| 3759 |
break; |
break; |
| 3767 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3768 |
} |
} |
| 3769 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 3770 |
prop_script = UCD_SCRIPT(c); |
if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) |
|
if ((prop_script == prop_value) == prop_fail_result) |
|
| 3771 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3772 |
} |
} |
| 3773 |
break; |
break; |
| 3775 |
case PT_ALNUM: |
case PT_ALNUM: |
| 3776 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 3777 |
{ |
{ |
| 3778 |
|
int category; |
| 3779 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 3780 |
{ |
{ |
| 3781 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
| 3782 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3783 |
} |
} |
| 3784 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 3785 |
prop_category = UCD_CATEGORY(c); |
category = UCD_CATEGORY(c); |
| 3786 |
if ((prop_category == ucp_L || prop_category == ucp_N) |
if ((category == ucp_L || category == ucp_N) == prop_fail_result) |
|
== prop_fail_result) |
|
| 3787 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3788 |
} |
} |
| 3789 |
break; |
break; |
| 3797 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3798 |
} |
} |
| 3799 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 3800 |
prop_category = UCD_CATEGORY(c); |
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
| 3801 |
c == CHAR_FF || c == CHAR_CR) |
c == CHAR_FF || c == CHAR_CR) |
| 3802 |
== prop_fail_result) |
== prop_fail_result) |
| 3803 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3813 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3814 |
} |
} |
| 3815 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 3816 |
prop_category = UCD_CATEGORY(c); |
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
| 3817 |
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
| 3818 |
== prop_fail_result) |
== prop_fail_result) |
| 3819 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3823 |
case PT_WORD: |
case PT_WORD: |
| 3824 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 3825 |
{ |
{ |
| 3826 |
|
int category; |
| 3827 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 3828 |
{ |
{ |
| 3829 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
| 3830 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3831 |
} |
} |
| 3832 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 3833 |
prop_category = UCD_CATEGORY(c); |
category = UCD_CATEGORY(c); |
| 3834 |
if ((prop_category == ucp_L || prop_category == ucp_N || |
if ((category == ucp_L || category == ucp_N || c == CHAR_UNDERSCORE) |
|
c == CHAR_UNDERSCORE) |
|
| 3835 |
== prop_fail_result) |
== prop_fail_result) |
| 3836 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3837 |
} |
} |
| 3857 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 3858 |
} |
} |
| 3859 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 3860 |
prop_category = UCD_CATEGORY(c); |
if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH); |
|
if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH); |
|
| 3861 |
while (eptr < md->end_subject) |
while (eptr < md->end_subject) |
| 3862 |
{ |
{ |
| 3863 |
int len = 1; |
int len = 1; |
| 3864 |
if (!utf8) c = *eptr; |
if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
| 3865 |
else { GETCHARLEN(c, eptr, len); } |
if (UCD_CATEGORY(c) != ucp_M) break; |
|
prop_category = UCD_CATEGORY(c); |
|
|
if (prop_category != ucp_M) break; |
|
| 3866 |
eptr += len; |
eptr += len; |
| 3867 |
} |
} |
| 3868 |
} |
} |
| 4407 |
case PT_LAMP: |
case PT_LAMP: |
| 4408 |
for (fi = min;; fi++) |
for (fi = min;; fi++) |
| 4409 |
{ |
{ |
| 4410 |
|
int chartype; |
| 4411 |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM37); |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM37); |
| 4412 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 4413 |
if (fi >= max) MRRETURN(MATCH_NOMATCH); |
if (fi >= max) MRRETURN(MATCH_NOMATCH); |
| 4417 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4418 |
} |
} |
| 4419 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 4420 |
prop_chartype = UCD_CHARTYPE(c); |
chartype = UCD_CHARTYPE(c); |
| 4421 |
if ((prop_chartype == ucp_Lu || |
if ((chartype == ucp_Lu || |
| 4422 |
prop_chartype == ucp_Ll || |
chartype == ucp_Ll || |
| 4423 |
prop_chartype == ucp_Lt) == prop_fail_result) |
chartype == ucp_Lt) == prop_fail_result) |
| 4424 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4425 |
} |
} |
| 4426 |
/* Control never gets here */ |
/* Control never gets here */ |
| 4437 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4438 |
} |
} |
| 4439 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 4440 |
prop_category = UCD_CATEGORY(c); |
if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) |
|
if ((prop_category == prop_value) == prop_fail_result) |
|
| 4441 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4442 |
} |
} |
| 4443 |
/* Control never gets here */ |
/* Control never gets here */ |
| 4454 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4455 |
} |
} |
| 4456 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 4457 |
prop_chartype = UCD_CHARTYPE(c); |
if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) |
|
if ((prop_chartype == prop_value) == prop_fail_result) |
|
| 4458 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4459 |
} |
} |
| 4460 |
/* Control never gets here */ |
/* Control never gets here */ |
| 4471 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4472 |
} |
} |
| 4473 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 4474 |
prop_script = UCD_SCRIPT(c); |
if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) |
|
if ((prop_script == prop_value) == prop_fail_result) |
|
| 4475 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4476 |
} |
} |
| 4477 |
/* Control never gets here */ |
/* Control never gets here */ |
| 4479 |
case PT_ALNUM: |
case PT_ALNUM: |
| 4480 |
for (fi = min;; fi++) |
for (fi = min;; fi++) |
| 4481 |
{ |
{ |
| 4482 |
|
int category; |
| 4483 |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM59); |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM59); |
| 4484 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 4485 |
if (fi >= max) MRRETURN(MATCH_NOMATCH); |
if (fi >= max) MRRETURN(MATCH_NOMATCH); |
| 4489 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4490 |
} |
} |
| 4491 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 4492 |
prop_category = UCD_CATEGORY(c); |
category = UCD_CATEGORY(c); |
| 4493 |
if ((prop_category == ucp_L || prop_category == ucp_N) |
if ((category == ucp_L || category == ucp_N) == prop_fail_result) |
|
== prop_fail_result) |
|
| 4494 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4495 |
} |
} |
| 4496 |
/* Control never gets here */ |
/* Control never gets here */ |
| 4507 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4508 |
} |
} |
| 4509 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 4510 |
prop_category = UCD_CATEGORY(c); |
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
| 4511 |
c == CHAR_FF || c == CHAR_CR) |
c == CHAR_FF || c == CHAR_CR) |
| 4512 |
== prop_fail_result) |
== prop_fail_result) |
| 4513 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4526 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4527 |
} |
} |
| 4528 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 4529 |
prop_category = UCD_CATEGORY(c); |
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
| 4530 |
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
| 4531 |
== prop_fail_result) |
== prop_fail_result) |
| 4532 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4536 |
case PT_WORD: |
case PT_WORD: |
| 4537 |
for (fi = min;; fi++) |
for (fi = min;; fi++) |
| 4538 |
{ |
{ |
| 4539 |
|
int category; |
| 4540 |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM62); |
RMATCH(eptr, ecode, offset_top, md, eptrb, RM62); |
| 4541 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 4542 |
if (fi >= max) MRRETURN(MATCH_NOMATCH); |
if (fi >= max) MRRETURN(MATCH_NOMATCH); |
| 4546 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4547 |
} |
} |
| 4548 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 4549 |
prop_category = UCD_CATEGORY(c); |
category = UCD_CATEGORY(c); |
| 4550 |
if ((prop_category == ucp_L || |
if ((category == ucp_L || |
| 4551 |
prop_category == ucp_N || |
category == ucp_N || |
| 4552 |
c == CHAR_UNDERSCORE) |
c == CHAR_UNDERSCORE) |
| 4553 |
== prop_fail_result) |
== prop_fail_result) |
| 4554 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4578 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 4579 |
} |
} |
| 4580 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 4581 |
prop_category = UCD_CATEGORY(c); |
if (UCD_CATEGORY(c) == ucp_M) MRRETURN(MATCH_NOMATCH); |
|
if (prop_category == ucp_M) MRRETURN(MATCH_NOMATCH); |
|
| 4582 |
while (eptr < md->end_subject) |
while (eptr < md->end_subject) |
| 4583 |
{ |
{ |
| 4584 |
int len = 1; |
int len = 1; |
| 4585 |
if (!utf8) c = *eptr; |
if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
| 4586 |
else { GETCHARLEN(c, eptr, len); } |
if (UCD_CATEGORY(c) != ucp_M) break; |
|
prop_category = UCD_CATEGORY(c); |
|
|
if (prop_category != ucp_M) break; |
|
| 4587 |
eptr += len; |
eptr += len; |
| 4588 |
} |
} |
| 4589 |
} |
} |
| 4590 |
} |
} |
|
|
|
| 4591 |
else |
else |
| 4592 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
| 4593 |
|
|
| 4908 |
case PT_LAMP: |
case PT_LAMP: |
| 4909 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 4910 |
{ |
{ |
| 4911 |
|
int chartype; |
| 4912 |
int len = 1; |
int len = 1; |
| 4913 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 4914 |
{ |
{ |
| 4916 |
break; |
break; |
| 4917 |
} |
} |
| 4918 |
GETCHARLENTEST(c, eptr, len); |
GETCHARLENTEST(c, eptr, len); |
| 4919 |
prop_chartype = UCD_CHARTYPE(c); |
chartype = UCD_CHARTYPE(c); |
| 4920 |
if ((prop_chartype == ucp_Lu || |
if ((chartype == ucp_Lu || |
| 4921 |
prop_chartype == ucp_Ll || |
chartype == ucp_Ll || |
| 4922 |
prop_chartype == ucp_Lt) == prop_fail_result) |
chartype == ucp_Lt) == prop_fail_result) |
| 4923 |
break; |
break; |
| 4924 |
eptr+= len; |
eptr+= len; |
| 4925 |
} |
} |
| 4935 |
break; |
break; |
| 4936 |
} |
} |
| 4937 |
GETCHARLENTEST(c, eptr, len); |
GETCHARLENTEST(c, eptr, len); |
| 4938 |
prop_category = UCD_CATEGORY(c); |
if ((UCD_CATEGORY(c) == prop_value) == prop_fail_result) break; |
|
if ((prop_category == prop_value) == prop_fail_result) |
|
|
break; |
|
| 4939 |
eptr+= len; |
eptr+= len; |
| 4940 |
} |
} |
| 4941 |
break; |
break; |
| 4950 |
break; |
break; |
| 4951 |
} |
} |
| 4952 |
GETCHARLENTEST(c, eptr, len); |
GETCHARLENTEST(c, eptr, len); |
| 4953 |
prop_chartype = UCD_CHARTYPE(c); |
if ((UCD_CHARTYPE(c) == prop_value) == prop_fail_result) break; |
|
if ((prop_chartype == prop_value) == prop_fail_result) |
|
|
break; |
|
| 4954 |
eptr+= len; |
eptr+= len; |
| 4955 |
} |
} |
| 4956 |
break; |
break; |
| 4965 |
break; |
break; |
| 4966 |
} |
} |
| 4967 |
GETCHARLENTEST(c, eptr, len); |
GETCHARLENTEST(c, eptr, len); |
| 4968 |
prop_script = UCD_SCRIPT(c); |
if ((UCD_SCRIPT(c) == prop_value) == prop_fail_result) break; |
|
if ((prop_script == prop_value) == prop_fail_result) |
|
|
break; |
|
| 4969 |
eptr+= len; |
eptr+= len; |
| 4970 |
} |
} |
| 4971 |
break; |
break; |
| 4973 |
case PT_ALNUM: |
case PT_ALNUM: |
| 4974 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 4975 |
{ |
{ |
| 4976 |
|
int category; |
| 4977 |
int len = 1; |
int len = 1; |
| 4978 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 4979 |
{ |
{ |
| 4981 |
break; |
break; |
| 4982 |
} |
} |
| 4983 |
GETCHARLENTEST(c, eptr, len); |
GETCHARLENTEST(c, eptr, len); |
| 4984 |
prop_category = UCD_CATEGORY(c); |
category = UCD_CATEGORY(c); |
| 4985 |
if ((prop_category == ucp_L || prop_category == ucp_N) |
if ((category == ucp_L || category == ucp_N) == prop_fail_result) |
|
== prop_fail_result) |
|
| 4986 |
break; |
break; |
| 4987 |
eptr+= len; |
eptr+= len; |
| 4988 |
} |
} |
| 4998 |
break; |
break; |
| 4999 |
} |
} |
| 5000 |
GETCHARLENTEST(c, eptr, len); |
GETCHARLENTEST(c, eptr, len); |
| 5001 |
prop_category = UCD_CATEGORY(c); |
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
| 5002 |
c == CHAR_FF || c == CHAR_CR) |
c == CHAR_FF || c == CHAR_CR) |
| 5003 |
== prop_fail_result) |
== prop_fail_result) |
| 5004 |
break; |
break; |
| 5016 |
break; |
break; |
| 5017 |
} |
} |
| 5018 |
GETCHARLENTEST(c, eptr, len); |
GETCHARLENTEST(c, eptr, len); |
| 5019 |
prop_category = UCD_CATEGORY(c); |
if ((UCD_CATEGORY(c) == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
if ((prop_category == ucp_Z || c == CHAR_HT || c == CHAR_NL || |
|
| 5020 |
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
c == CHAR_VT || c == CHAR_FF || c == CHAR_CR) |
| 5021 |
== prop_fail_result) |
== prop_fail_result) |
| 5022 |
break; |
break; |
| 5027 |
case PT_WORD: |
case PT_WORD: |
| 5028 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 5029 |
{ |
{ |
| 5030 |
|
int category; |
| 5031 |
int len = 1; |
int len = 1; |
| 5032 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 5033 |
{ |
{ |
| 5035 |
break; |
break; |
| 5036 |
} |
} |
| 5037 |
GETCHARLENTEST(c, eptr, len); |
GETCHARLENTEST(c, eptr, len); |
| 5038 |
prop_category = UCD_CATEGORY(c); |
category = UCD_CATEGORY(c); |
| 5039 |
if ((prop_category == ucp_L || prop_category == ucp_N || |
if ((category == ucp_L || category == ucp_N || |
| 5040 |
c == CHAR_UNDERSCORE) == prop_fail_result) |
c == CHAR_UNDERSCORE) == prop_fail_result) |
| 5041 |
break; |
break; |
| 5042 |
eptr+= len; |
eptr+= len; |
| 5066 |
{ |
{ |
| 5067 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 5068 |
{ |
{ |
| 5069 |
|
int len = 1; |
| 5070 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 5071 |
{ |
{ |
| 5072 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
| 5073 |
break; |
break; |
| 5074 |
} |
} |
| 5075 |
GETCHARINCTEST(c, eptr); |
if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
| 5076 |
prop_category = UCD_CATEGORY(c); |
if (UCD_CATEGORY(c) == ucp_M) break; |
| 5077 |
if (prop_category == ucp_M) break; |
eptr += len; |
| 5078 |
while (eptr < md->end_subject) |
while (eptr < md->end_subject) |
| 5079 |
{ |
{ |
| 5080 |
int len = 1; |
len = 1; |
| 5081 |
if (!utf8) c = *eptr; else |
if (!utf8) c = *eptr; else { GETCHARLEN(c, eptr, len); } |
| 5082 |
{ |
if (UCD_CATEGORY(c) != ucp_M) break; |
|
GETCHARLEN(c, eptr, len); |
|
|
} |
|
|
prop_category = UCD_CATEGORY(c); |
|
|
if (prop_category != ucp_M) break; |
|
| 5083 |
eptr += len; |
eptr += len; |
| 5084 |
} |
} |
| 5085 |
} |
} |
| 5095 |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
| 5096 |
for (;;) /* Move back over one extended */ |
for (;;) /* Move back over one extended */ |
| 5097 |
{ |
{ |
|
int len = 1; |
|
| 5098 |
if (!utf8) c = *eptr; else |
if (!utf8) c = *eptr; else |
| 5099 |
{ |
{ |
| 5100 |
BACKCHAR(eptr); |
BACKCHAR(eptr); |
| 5101 |
GETCHARLEN(c, eptr, len); |
GETCHAR(c, eptr); |
| 5102 |
} |
} |
| 5103 |
prop_category = UCD_CATEGORY(c); |
if (UCD_CATEGORY(c) != ucp_M) break; |
|
if (prop_category != ucp_M) break; |
|
| 5104 |
eptr--; |
eptr--; |
| 5105 |
} |
} |
| 5106 |
} |
} |
| 5637 |
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17) |
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17) |
| 5638 |
LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33) |
LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33) |
| 5639 |
LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) |
LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) |
| 5640 |
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) |
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) |
| 5641 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 5642 |
LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30) |
LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30) |
| 5643 |
LBL(32) LBL(34) LBL(42) LBL(46) |
LBL(32) LBL(34) LBL(42) LBL(46) |