| 277 |
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, |
RM31, RM32, RM33, RM34, RM35, RM36, RM37, RM38, RM39, RM40, |
| 278 |
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, |
RM41, RM42, RM43, RM44, RM45, RM46, RM47, RM48, RM49, RM50, |
| 279 |
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, |
RM51, RM52, RM53, RM54, RM55, RM56, RM57, RM58, RM59, RM60, |
| 280 |
RM61, RM62, RM63 }; |
RM61, RM62, RM63, RM64, RM65, RM66 }; |
| 281 |
|
|
| 282 |
/* These versions of the macros use the stack, as normal. There are debugging |
/* These versions of the macros use the stack, as normal. There are debugging |
| 283 |
versions and production versions. Note that the "rw" argument of RMATCH isn't |
versions and production versions. Note that the "rw" argument of RMATCH isn't |
| 775 |
md->start_match_ptr = ecode + 2; |
md->start_match_ptr = ecode + 2; |
| 776 |
RRETURN(MATCH_SKIP_ARG); |
RRETURN(MATCH_SKIP_ARG); |
| 777 |
|
|
| 778 |
/* For THEN (and THEN_ARG) we pass back the address of the bracket or |
/* For THEN (and THEN_ARG) we pass back the address of the opcode, so that |
| 779 |
the alt that is at the start of the current branch. This makes it possible |
the branch in which it occurs can be determined. Overload the start of |
| 780 |
to skip back past alternatives that precede the THEN within the current |
match pointer to do this. */ |
|
branch. */ |
|
| 781 |
|
|
| 782 |
case OP_THEN: |
case OP_THEN: |
| 783 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
| 784 |
eptrb, RM54); |
eptrb, RM54); |
| 785 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 786 |
md->start_match_ptr = ecode - GET(ecode, 1); |
md->start_match_ptr = ecode; |
| 787 |
MRRETURN(MATCH_THEN); |
MRRETURN(MATCH_THEN); |
| 788 |
|
|
| 789 |
case OP_THEN_ARG: |
case OP_THEN_ARG: |
| 790 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1+LINK_SIZE], |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode] + ecode[1], offset_top, |
| 791 |
offset_top, md, eptrb, RM58); |
md, eptrb, RM58); |
| 792 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 793 |
md->start_match_ptr = ecode - GET(ecode, 1); |
md->start_match_ptr = ecode; |
| 794 |
md->mark = ecode + LINK_SIZE + 2; |
md->mark = ecode + 2; |
| 795 |
RRETURN(MATCH_THEN); |
RRETURN(MATCH_THEN); |
| 796 |
|
|
| 797 |
|
/* Handle an atomic group that does not contain any capturing parentheses. |
| 798 |
|
This can be handled like an assertion. Prior to 8.13, all atomic groups |
| 799 |
|
were handled this way. In 8.13, the code was changed as below for ONCE, so |
| 800 |
|
that backups pass through the group and thereby reset captured values. |
| 801 |
|
However, this uses a lot more stack, so in 8.20, atomic groups that do not |
| 802 |
|
contain any captures generate OP_ONCE_NC, which can be handled in the old, |
| 803 |
|
less stack intensive way. |
| 804 |
|
|
| 805 |
|
Check the alternative branches in turn - the matching won't pass the KET |
| 806 |
|
for this kind of subpattern. If any one branch matches, we carry on as at |
| 807 |
|
the end of a normal bracket, leaving the subject pointer, but resetting |
| 808 |
|
the start-of-match value in case it was changed by \K. */ |
| 809 |
|
|
| 810 |
|
case OP_ONCE_NC: |
| 811 |
|
prev = ecode; |
| 812 |
|
saved_eptr = eptr; |
| 813 |
|
do |
| 814 |
|
{ |
| 815 |
|
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM64); |
| 816 |
|
if (rrc == MATCH_MATCH) /* Note: _not_ MATCH_ACCEPT */ |
| 817 |
|
{ |
| 818 |
|
mstart = md->start_match_ptr; |
| 819 |
|
markptr = md->mark; |
| 820 |
|
break; |
| 821 |
|
} |
| 822 |
|
if (rrc == MATCH_THEN) |
| 823 |
|
{ |
| 824 |
|
next = ecode + GET(ecode,1); |
| 825 |
|
if (md->start_match_ptr < next && |
| 826 |
|
(*ecode == OP_ALT || *next == OP_ALT)) |
| 827 |
|
rrc = MATCH_NOMATCH; |
| 828 |
|
} |
| 829 |
|
|
| 830 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 831 |
|
ecode += GET(ecode,1); |
| 832 |
|
} |
| 833 |
|
while (*ecode == OP_ALT); |
| 834 |
|
|
| 835 |
|
/* If hit the end of the group (which could be repeated), fail */ |
| 836 |
|
|
| 837 |
|
if (*ecode != OP_ONCE_NC && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH); |
| 838 |
|
|
| 839 |
|
/* Continue as from after the group, updating the offsets high water |
| 840 |
|
mark, since extracts may have been taken. */ |
| 841 |
|
|
| 842 |
|
do ecode += GET(ecode, 1); while (*ecode == OP_ALT); |
| 843 |
|
|
| 844 |
|
offset_top = md->end_offset_top; |
| 845 |
|
eptr = md->end_match_ptr; |
| 846 |
|
|
| 847 |
|
/* For a non-repeating ket, just continue at this level. This also |
| 848 |
|
happens for a repeating ket if no characters were matched in the group. |
| 849 |
|
This is the forcible breaking of infinite loops as implemented in Perl |
| 850 |
|
5.005. */ |
| 851 |
|
|
| 852 |
|
if (*ecode == OP_KET || eptr == saved_eptr) |
| 853 |
|
{ |
| 854 |
|
ecode += 1+LINK_SIZE; |
| 855 |
|
break; |
| 856 |
|
} |
| 857 |
|
|
| 858 |
|
/* The repeating kets try the rest of the pattern or restart from the |
| 859 |
|
preceding bracket, in the appropriate order. The second "call" of match() |
| 860 |
|
uses tail recursion, to avoid using another stack frame. */ |
| 861 |
|
|
| 862 |
|
if (*ecode == OP_KETRMIN) |
| 863 |
|
{ |
| 864 |
|
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM65); |
| 865 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 866 |
|
ecode = prev; |
| 867 |
|
goto TAIL_RECURSE; |
| 868 |
|
} |
| 869 |
|
else /* OP_KETRMAX */ |
| 870 |
|
{ |
| 871 |
|
md->match_function_type = MATCH_CBEGROUP; |
| 872 |
|
RMATCH(eptr, prev, offset_top, md, eptrb, RM66); |
| 873 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 874 |
|
ecode += 1 + LINK_SIZE; |
| 875 |
|
goto TAIL_RECURSE; |
| 876 |
|
} |
| 877 |
|
/* Control never gets here */ |
| 878 |
|
|
| 879 |
/* Handle a capturing bracket, other than those that are possessive with an |
/* Handle a capturing bracket, other than those that are possessive with an |
| 880 |
unlimited repeat. If there is space in the offset vector, save the current |
unlimited repeat. If there is space in the offset vector, save the current |
| 881 |
subject position in the working slot at the top of the vector. We mustn't |
subject position in the working slot at the top of the vector. We mustn't |
| 919 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, |
| 920 |
eptrb, RM1); |
eptrb, RM1); |
| 921 |
if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ |
if (rrc == MATCH_ONCE) break; /* Backing up through an atomic group */ |
| 922 |
if (rrc != MATCH_NOMATCH && |
|
| 923 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
/* If we backed up to a THEN, check whether it is within the current |
| 924 |
RRETURN(rrc); |
branch by comparing the address of the THEN that is passed back with |
| 925 |
|
the end of the branch. If it is within the current branch, and the |
| 926 |
|
branch is one of two or more alternatives (it either starts or ends |
| 927 |
|
with OP_ALT), we have reached the limit of THEN's action, so convert |
| 928 |
|
the return code to NOMATCH, which will cause normal backtracking to |
| 929 |
|
happen from now on. Otherwise, THEN is passed back to an outer |
| 930 |
|
alternative. This implements Perl's treatment of parenthesized groups, |
| 931 |
|
where a group not containing | does not affect the current alternative, |
| 932 |
|
that is, (X) is NOT the same as (X|(*F)). */ |
| 933 |
|
|
| 934 |
|
if (rrc == MATCH_THEN) |
| 935 |
|
{ |
| 936 |
|
next = ecode + GET(ecode,1); |
| 937 |
|
if (md->start_match_ptr < next && |
| 938 |
|
(*ecode == OP_ALT || *next == OP_ALT)) |
| 939 |
|
rrc = MATCH_NOMATCH; |
| 940 |
|
} |
| 941 |
|
|
| 942 |
|
/* Anything other than NOMATCH is passed back. */ |
| 943 |
|
|
| 944 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 945 |
md->capture_last = save_capture_last; |
md->capture_last = save_capture_last; |
| 946 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 947 |
if (*ecode != OP_ALT) break; |
if (*ecode != OP_ALT) break; |
| 952 |
md->offset_vector[offset+1] = save_offset2; |
md->offset_vector[offset+1] = save_offset2; |
| 953 |
md->offset_vector[md->offset_end - number] = save_offset3; |
md->offset_vector[md->offset_end - number] = save_offset3; |
| 954 |
|
|
| 955 |
/* At this point, rrc will be one of MATCH_ONCE, MATCH_NOMATCH, or |
/* At this point, rrc will be one of MATCH_ONCE or MATCH_NOMATCH. */ |
|
MATCH_THEN. */ |
|
| 956 |
|
|
| 957 |
if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr; |
if (md->mark == NULL) md->mark = markptr; |
| 958 |
RRETURN(((rrc == MATCH_ONCE)? MATCH_ONCE:MATCH_NOMATCH)); |
RRETURN(rrc); |
| 959 |
} |
} |
| 960 |
|
|
| 961 |
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat |
/* FALL THROUGH ... Insufficient room for saving captured contents. Treat |
| 970 |
/* VVVVVVVVVVVVVVVVVVVVVVVVV */ |
/* VVVVVVVVVVVVVVVVVVVVVVVVV */ |
| 971 |
|
|
| 972 |
/* Non-capturing or atomic group, except for possessive with unlimited |
/* Non-capturing or atomic group, except for possessive with unlimited |
| 973 |
repeat. Loop for all the alternatives. When we get to the final alternative |
repeat and ONCE group with no captures. Loop for all the alternatives. |
| 974 |
within the brackets, we used to return the result of a recursive call to |
|
| 975 |
match() whatever happened so it was possible to reduce stack usage by |
When we get to the final alternative within the brackets, we used to return |
| 976 |
turning this into a tail recursion, except in the case of a possibly empty |
the result of a recursive call to match() whatever happened so it was |
| 977 |
group. However, now that there is the possiblity of (*THEN) occurring in |
possible to reduce stack usage by turning this into a tail recursion, |
| 978 |
the final alternative, this optimization is no longer possible. |
except in the case of a possibly empty group. However, now that there is |
| 979 |
|
the possiblity of (*THEN) occurring in the final alternative, this |
| 980 |
|
optimization is no longer always possible. |
| 981 |
|
|
| 982 |
|
We can optimize if we know there are no (*THEN)s in the pattern; at present |
| 983 |
|
this is the best that can be done. |
| 984 |
|
|
| 985 |
MATCH_ONCE is returned when the end of an atomic group is successfully |
MATCH_ONCE is returned when the end of an atomic group is successfully |
| 986 |
reached, but subsequent matching fails. It passes back up the tree (causing |
reached, but subsequent matching fails. It passes back up the tree (causing |
| 997 |
for (;;) |
for (;;) |
| 998 |
{ |
{ |
| 999 |
if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP; |
if (op >= OP_SBRA || op == OP_ONCE) md->match_function_type = MATCH_CBEGROUP; |
| 1000 |
|
|
| 1001 |
|
/* If this is not a possibly empty group, and there are no (*THEN)s in |
| 1002 |
|
the pattern, and this is the final alternative, optimize as described |
| 1003 |
|
above. */ |
| 1004 |
|
|
| 1005 |
|
else if (!md->hasthen && ecode[GET(ecode, 1)] != OP_ALT) |
| 1006 |
|
{ |
| 1007 |
|
ecode += _pcre_OP_lengths[*ecode]; |
| 1008 |
|
goto TAIL_RECURSE; |
| 1009 |
|
} |
| 1010 |
|
|
| 1011 |
|
/* In all other cases, we have to make another call to match(). */ |
| 1012 |
|
|
| 1013 |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb, |
RMATCH(eptr, ecode + _pcre_OP_lengths[*ecode], offset_top, md, eptrb, |
| 1014 |
RM2); |
RM2); |
| 1015 |
if (rrc != MATCH_NOMATCH && |
|
| 1016 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
/* See comment in the code for capturing groups above about handling |
| 1017 |
|
THEN. */ |
| 1018 |
|
|
| 1019 |
|
if (rrc == MATCH_THEN) |
| 1020 |
|
{ |
| 1021 |
|
next = ecode + GET(ecode,1); |
| 1022 |
|
if (md->start_match_ptr < next && |
| 1023 |
|
(*ecode == OP_ALT || *next == OP_ALT)) |
| 1024 |
|
rrc = MATCH_NOMATCH; |
| 1025 |
|
} |
| 1026 |
|
|
| 1027 |
|
if (rrc != MATCH_NOMATCH) |
| 1028 |
{ |
{ |
| 1029 |
if (rrc == MATCH_ONCE) |
if (rrc == MATCH_ONCE) |
| 1030 |
{ |
{ |
| 1041 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 1042 |
if (*ecode != OP_ALT) break; |
if (*ecode != OP_ALT) break; |
| 1043 |
} |
} |
| 1044 |
if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr; |
|
| 1045 |
|
if (md->mark == NULL) md->mark = markptr; |
| 1046 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
| 1047 |
|
|
| 1048 |
/* Handle possessive capturing brackets with an unlimited repeat. We come |
/* Handle possessive capturing brackets with an unlimited repeat. We come |
| 1105 |
matched_once = TRUE; |
matched_once = TRUE; |
| 1106 |
continue; |
continue; |
| 1107 |
} |
} |
| 1108 |
if (rrc != MATCH_NOMATCH && |
|
| 1109 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
/* See comment in the code for capturing groups above about handling |
| 1110 |
RRETURN(rrc); |
THEN. */ |
| 1111 |
|
|
| 1112 |
|
if (rrc == MATCH_THEN) |
| 1113 |
|
{ |
| 1114 |
|
next = ecode + GET(ecode,1); |
| 1115 |
|
if (md->start_match_ptr < next && |
| 1116 |
|
(*ecode == OP_ALT || *next == OP_ALT)) |
| 1117 |
|
rrc = MATCH_NOMATCH; |
| 1118 |
|
} |
| 1119 |
|
|
| 1120 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 1121 |
md->capture_last = save_capture_last; |
md->capture_last = save_capture_last; |
| 1122 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 1123 |
if (*ecode != OP_ALT) break; |
if (*ecode != OP_ALT) break; |
| 1130 |
md->offset_vector[md->offset_end - number] = save_offset3; |
md->offset_vector[md->offset_end - number] = save_offset3; |
| 1131 |
} |
} |
| 1132 |
|
|
| 1133 |
if (rrc != MATCH_THEN && md->mark == NULL) md->mark = markptr; |
if (md->mark == NULL) md->mark = markptr; |
| 1134 |
if (allow_zero || matched_once) |
if (allow_zero || matched_once) |
| 1135 |
{ |
{ |
| 1136 |
ecode += 1 + LINK_SIZE; |
ecode += 1 + LINK_SIZE; |
| 1177 |
matched_once = TRUE; |
matched_once = TRUE; |
| 1178 |
continue; |
continue; |
| 1179 |
} |
} |
| 1180 |
if (rrc != MATCH_NOMATCH && |
|
| 1181 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
/* See comment in the code for capturing groups above about handling |
| 1182 |
RRETURN(rrc); |
THEN. */ |
| 1183 |
|
|
| 1184 |
|
if (rrc == MATCH_THEN) |
| 1185 |
|
{ |
| 1186 |
|
next = ecode + GET(ecode,1); |
| 1187 |
|
if (md->start_match_ptr < next && |
| 1188 |
|
(*ecode == OP_ALT || *next == OP_ALT)) |
| 1189 |
|
rrc = MATCH_NOMATCH; |
| 1190 |
|
} |
| 1191 |
|
|
| 1192 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 1193 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 1194 |
if (*ecode != OP_ALT) break; |
if (*ecode != OP_ALT) break; |
| 1195 |
} |
} |
| 1220 |
if (pcre_callout != NULL) |
if (pcre_callout != NULL) |
| 1221 |
{ |
{ |
| 1222 |
pcre_callout_block cb; |
pcre_callout_block cb; |
| 1223 |
cb.version = 1; /* Version 1 of the callout block */ |
cb.version = 2; /* Version 1 of the callout block */ |
| 1224 |
cb.callout_number = ecode[LINK_SIZE+2]; |
cb.callout_number = ecode[LINK_SIZE+2]; |
| 1225 |
cb.offset_vector = md->offset_vector; |
cb.offset_vector = md->offset_vector; |
| 1226 |
cb.subject = (PCRE_SPTR)md->start_subject; |
cb.subject = (PCRE_SPTR)md->start_subject; |
| 1232 |
cb.capture_top = offset_top/2; |
cb.capture_top = offset_top/2; |
| 1233 |
cb.capture_last = md->capture_last; |
cb.capture_last = md->capture_last; |
| 1234 |
cb.callout_data = md->callout_data; |
cb.callout_data = md->callout_data; |
| 1235 |
|
cb.mark = markptr; |
| 1236 |
if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); |
if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); |
| 1237 |
if (rrc < 0) RRETURN(rrc); |
if (rrc < 0) RRETURN(rrc); |
| 1238 |
} |
} |
| 1253 |
else |
else |
| 1254 |
{ |
{ |
| 1255 |
int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ |
int recno = GET2(ecode, LINK_SIZE + 2); /* Recursion group number*/ |
| 1256 |
condition = (recno == RREF_ANY || recno == md->recursive->group_num); |
condition = (recno == RREF_ANY || recno == md->recursive->group_num); |
| 1257 |
|
|
| 1258 |
/* If the test is for recursion into a specific subpattern, and it is |
/* If the test is for recursion into a specific subpattern, and it is |
| 1259 |
false, but the test was set up by name, scan the table to see if the |
false, but the test was set up by name, scan the table to see if the |
| 1260 |
name refers to any other numbers, and test them. The condition is true |
name refers to any other numbers, and test them. The condition is true |
| 1261 |
if any one is set. */ |
if any one is set. */ |
| 1262 |
|
|
| 1263 |
if (!condition && condcode == OP_NRREF && recno != RREF_ANY) |
if (!condition && condcode == OP_NRREF) |
| 1264 |
{ |
{ |
| 1265 |
uschar *slotA = md->name_table; |
uschar *slotA = md->name_table; |
| 1266 |
for (i = 0; i < md->name_count; i++) |
for (i = 0; i < md->name_count; i++) |
| 1401 |
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); |
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE + 2); |
| 1402 |
while (*ecode == OP_ALT) ecode += GET(ecode, 1); |
while (*ecode == OP_ALT) ecode += GET(ecode, 1); |
| 1403 |
} |
} |
| 1404 |
else if (rrc != MATCH_NOMATCH && |
|
| 1405 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
/* PCRE doesn't allow the effect of (*THEN) to escape beyond an |
| 1406 |
|
assertion; it is therefore treated as NOMATCH. */ |
| 1407 |
|
|
| 1408 |
|
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) |
| 1409 |
{ |
{ |
| 1410 |
RRETURN(rrc); /* Need braces because of following else */ |
RRETURN(rrc); /* Need braces because of following else */ |
| 1411 |
} |
} |
| 1416 |
} |
} |
| 1417 |
} |
} |
| 1418 |
|
|
| 1419 |
/* We are now at the branch that is to be obeyed. As there is only one, |
/* We are now at the branch that is to be obeyed. As there is only one, can |
| 1420 |
we used to use tail recursion to avoid using another stack frame, except |
use tail recursion to avoid using another stack frame, except when there is |
| 1421 |
when there was unlimited repeat of a possibly empty group. However, that |
unlimited repeat of a possibly empty group. In the latter case, a recursive |
| 1422 |
strategy no longer works because of the possibilty of (*THEN) being |
call to match() is always required, unless the second alternative doesn't |
| 1423 |
encountered in the branch. A recursive call to match() is always required, |
exist, in which case we can just plough on. Note that, for compatibility |
| 1424 |
unless the second alternative doesn't exist, in which case we can just |
with Perl, the | in a conditional group is NOT treated as creating two |
| 1425 |
plough on. */ |
alternatives. If a THEN is encountered in the branch, it propagates out to |
| 1426 |
|
the enclosing alternative (unless nested in a deeper set of alternatives, |
| 1427 |
|
of course). */ |
| 1428 |
|
|
| 1429 |
if (condition || *ecode == OP_ALT) |
if (condition || *ecode == OP_ALT) |
| 1430 |
{ |
{ |
| 1431 |
if (op == OP_SCOND) md->match_function_type = MATCH_CBEGROUP; |
if (op != OP_SCOND) |
| 1432 |
|
{ |
| 1433 |
|
ecode += 1 + LINK_SIZE; |
| 1434 |
|
goto TAIL_RECURSE; |
| 1435 |
|
} |
| 1436 |
|
|
| 1437 |
|
md->match_function_type = MATCH_CBEGROUP; |
| 1438 |
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49); |
RMATCH(eptr, ecode + 1 + LINK_SIZE, offset_top, md, eptrb, RM49); |
|
if (rrc == MATCH_THEN && md->start_match_ptr == ecode) |
|
|
rrc = MATCH_NOMATCH; |
|
| 1439 |
RRETURN(rrc); |
RRETURN(rrc); |
| 1440 |
} |
} |
| 1441 |
else /* Condition false & no alternative */ |
|
| 1442 |
|
/* Condition false & no alternative; continue after the group. */ |
| 1443 |
|
|
| 1444 |
|
else |
| 1445 |
{ |
{ |
| 1446 |
ecode += 1 + LINK_SIZE; |
ecode += 1 + LINK_SIZE; |
| 1447 |
} |
} |
| 1531 |
markptr = md->mark; |
markptr = md->mark; |
| 1532 |
break; |
break; |
| 1533 |
} |
} |
| 1534 |
if (rrc != MATCH_NOMATCH && |
|
| 1535 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
/* PCRE does not allow THEN to escape beyond an assertion; it is treated |
| 1536 |
RRETURN(rrc); |
as NOMATCH. */ |
| 1537 |
|
|
| 1538 |
|
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
| 1539 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 1540 |
} |
} |
| 1541 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 1576 |
do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
| 1577 |
break; |
break; |
| 1578 |
} |
} |
| 1579 |
if (rrc != MATCH_NOMATCH && |
|
| 1580 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
/* PCRE does not allow THEN to escape beyond an assertion; it is treated |
| 1581 |
RRETURN(rrc); |
as NOMATCH. */ |
| 1582 |
|
|
| 1583 |
|
if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) RRETURN(rrc); |
| 1584 |
ecode += GET(ecode,1); |
ecode += GET(ecode,1); |
| 1585 |
} |
} |
| 1586 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 1631 |
if (pcre_callout != NULL) |
if (pcre_callout != NULL) |
| 1632 |
{ |
{ |
| 1633 |
pcre_callout_block cb; |
pcre_callout_block cb; |
| 1634 |
cb.version = 1; /* Version 1 of the callout block */ |
cb.version = 2; /* Version 1 of the callout block */ |
| 1635 |
cb.callout_number = ecode[1]; |
cb.callout_number = ecode[1]; |
| 1636 |
cb.offset_vector = md->offset_vector; |
cb.offset_vector = md->offset_vector; |
| 1637 |
cb.subject = (PCRE_SPTR)md->start_subject; |
cb.subject = (PCRE_SPTR)md->start_subject; |
| 1643 |
cb.capture_top = offset_top/2; |
cb.capture_top = offset_top/2; |
| 1644 |
cb.capture_last = md->capture_last; |
cb.capture_last = md->capture_last; |
| 1645 |
cb.callout_data = md->callout_data; |
cb.callout_data = md->callout_data; |
| 1646 |
|
cb.mark = markptr; |
| 1647 |
if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); |
if ((rrc = (*pcre_callout)(&cb)) > 0) MRRETURN(MATCH_NOMATCH); |
| 1648 |
if (rrc < 0) RRETURN(rrc); |
if (rrc < 0) RRETURN(rrc); |
| 1649 |
} |
} |
| 1671 |
{ |
{ |
| 1672 |
recursion_info *ri; |
recursion_info *ri; |
| 1673 |
int recno; |
int recno; |
| 1674 |
|
|
| 1675 |
callpat = md->start_code + GET(ecode, 1); |
callpat = md->start_code + GET(ecode, 1); |
| 1676 |
recno = (callpat == md->start_code)? 0 : |
recno = (callpat == md->start_code)? 0 : |
| 1677 |
GET2(callpat, 1 + LINK_SIZE); |
GET2(callpat, 1 + LINK_SIZE); |
| 1678 |
|
|
| 1679 |
/* Check for repeating a recursion without advancing the subject pointer. |
/* Check for repeating a recursion without advancing the subject pointer. |
| 1680 |
This should catch convoluted mutual recursions. (Some simple cases are |
This should catch convoluted mutual recursions. (Some simple cases are |
| 1681 |
caught at compile time.) */ |
caught at compile time.) */ |
| 1682 |
|
|
| 1683 |
for (ri = md->recursive; ri != NULL; ri = ri->prevrec) |
for (ri = md->recursive; ri != NULL; ri = ri->prevrec) |
| 1684 |
if (recno == ri->group_num && eptr == ri->subject_position) |
if (recno == ri->group_num && eptr == ri->subject_position) |
| 1685 |
RRETURN(PCRE_ERROR_RECURSELOOP); |
RRETURN(PCRE_ERROR_RECURSELOOP); |
| 1686 |
|
|
| 1687 |
/* Add to "recursing stack" */ |
/* Add to "recursing stack" */ |
| 1722 |
md, eptrb, RM6); |
md, eptrb, RM6); |
| 1723 |
memcpy(md->offset_vector, new_recursive.offset_save, |
memcpy(md->offset_vector, new_recursive.offset_save, |
| 1724 |
new_recursive.saved_max * sizeof(int)); |
new_recursive.saved_max * sizeof(int)); |
| 1725 |
|
md->recursive = new_recursive.prevrec; |
| 1726 |
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
if (rrc == MATCH_MATCH || rrc == MATCH_ACCEPT) |
| 1727 |
{ |
{ |
| 1728 |
DPRINTF(("Recursion matched\n")); |
DPRINTF(("Recursion matched\n")); |
|
md->recursive = new_recursive.prevrec; |
|
| 1729 |
if (new_recursive.offset_save != stacksave) |
if (new_recursive.offset_save != stacksave) |
| 1730 |
(pcre_free)(new_recursive.offset_save); |
(pcre_free)(new_recursive.offset_save); |
| 1731 |
|
|
| 1737 |
mstart = md->start_match_ptr; |
mstart = md->start_match_ptr; |
| 1738 |
goto RECURSION_MATCHED; /* Exit loop; end processing */ |
goto RECURSION_MATCHED; /* Exit loop; end processing */ |
| 1739 |
} |
} |
| 1740 |
else if (rrc != MATCH_NOMATCH && |
|
| 1741 |
(rrc != MATCH_THEN || md->start_match_ptr != ecode)) |
/* PCRE does not allow THEN to escape beyond a recursion; it is treated |
| 1742 |
|
as NOMATCH. */ |
| 1743 |
|
|
| 1744 |
|
else if (rrc != MATCH_NOMATCH && rrc != MATCH_THEN) |
| 1745 |
{ |
{ |
| 1746 |
DPRINTF(("Recursion gave error %d\n", rrc)); |
DPRINTF(("Recursion gave error %d\n", rrc)); |
| 1747 |
if (new_recursive.offset_save != stacksave) |
if (new_recursive.offset_save != stacksave) |
| 1827 |
} |
} |
| 1828 |
else saved_eptr = NULL; |
else saved_eptr = NULL; |
| 1829 |
|
|
| 1830 |
/* If we are at the end of an assertion group, stop matching and return |
/* If we are at the end of an assertion group or a non-capturing atomic |
| 1831 |
MATCH_MATCH, but record the current high water mark for use by positive |
group, stop matching and return MATCH_MATCH, but record the current high |
| 1832 |
assertions. We also need to record the match start in case it was changed |
water mark for use by positive assertions. We also need to record the match |
| 1833 |
by \K. */ |
start in case it was changed by \K. */ |
| 1834 |
|
|
| 1835 |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || |
if ((*prev >= OP_ASSERT && *prev <= OP_ASSERTBACK_NOT) || |
| 1836 |
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT) |
*prev == OP_ONCE_NC) |
| 1837 |
{ |
{ |
| 1838 |
md->end_match_ptr = eptr; /* For ONCE */ |
md->end_match_ptr = eptr; /* For ONCE_NC */ |
| 1839 |
md->end_offset_top = offset_top; |
md->end_offset_top = offset_top; |
| 1840 |
md->start_match_ptr = mstart; |
md->start_match_ptr = mstart; |
| 1841 |
MRRETURN(MATCH_MATCH); /* Sets md->mark */ |
MRRETURN(MATCH_MATCH); /* Sets md->mark */ |
| 1903 |
/* For an ordinary non-repeating ket, just continue at this level. This |
/* For an ordinary non-repeating ket, just continue at this level. This |
| 1904 |
also happens for a repeating ket if no characters were matched in the |
also happens for a repeating ket if no characters were matched in the |
| 1905 |
group. This is the forcible breaking of infinite loops as implemented in |
group. This is the forcible breaking of infinite loops as implemented in |
| 1906 |
Perl 5.005. For a non-repeating atomic group, establish a backup point by |
Perl 5.005. For a non-repeating atomic group that includes captures, |
| 1907 |
processing the rest of the pattern at a lower level. If this results in a |
establish a backup point by processing the rest of the pattern at a lower |
| 1908 |
NOMATCH return, pass MATCH_ONCE back to the original OP_ONCE level, thereby |
level. If this results in a NOMATCH return, pass MATCH_ONCE back to the |
| 1909 |
bypassing intermediate backup points, but resetting any captures that |
original OP_ONCE level, thereby bypassing intermediate backup points, but |
| 1910 |
happened along the way. */ |
resetting any captures that happened along the way. */ |
| 1911 |
|
|
| 1912 |
if (*ecode == OP_KET || eptr == saved_eptr) |
if (*ecode == OP_KET || eptr == saved_eptr) |
| 1913 |
{ |
{ |
| 2183 |
/* Fall through */ |
/* Fall through */ |
| 2184 |
|
|
| 2185 |
case OP_ALLANY: |
case OP_ALLANY: |
| 2186 |
if (eptr++ >= md->end_subject) |
if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ |
| 2187 |
{ |
{ /* not be updated before SCHECK_PARTIAL. */ |
| 2188 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
| 2189 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 2190 |
} |
} |
| 2191 |
|
eptr++; |
| 2192 |
if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 2193 |
ecode++; |
ecode++; |
| 2194 |
break; |
break; |
| 2197 |
any byte, even newline, independent of the setting of PCRE_DOTALL. */ |
any byte, even newline, independent of the setting of PCRE_DOTALL. */ |
| 2198 |
|
|
| 2199 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 2200 |
if (eptr++ >= md->end_subject) |
if (eptr >= md->end_subject) /* DO NOT merge the eptr++ here; it must */ |
| 2201 |
{ |
{ /* not be updated before SCHECK_PARTIAL. */ |
| 2202 |
SCHECK_PARTIAL(); |
SCHECK_PARTIAL(); |
| 2203 |
MRRETURN(MATCH_NOMATCH); |
MRRETURN(MATCH_NOMATCH); |
| 2204 |
} |
} |
| 2205 |
|
eptr++; |
| 2206 |
ecode++; |
ecode++; |
| 2207 |
break; |
break; |
| 2208 |
|
|
| 5351 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 5352 |
} |
} |
| 5353 |
} |
} |
| 5354 |
else eptr = md->end_subject; /* Unlimited UTF-8 repeat */ |
else |
| 5355 |
|
{ |
| 5356 |
|
eptr = md->end_subject; /* Unlimited UTF-8 repeat */ |
| 5357 |
|
SCHECK_PARTIAL(); |
| 5358 |
|
} |
| 5359 |
break; |
break; |
| 5360 |
|
|
| 5361 |
/* The byte case is the same as non-UTF8 */ |
/* The byte case is the same as non-UTF8 */ |
| 5828 |
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17) |
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17) |
| 5829 |
LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33) |
LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33) |
| 5830 |
LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) |
LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) |
| 5831 |
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) |
LBL(53) LBL(54) LBL(55) LBL(56) LBL(57) LBL(58) LBL(63) LBL(64) |
| 5832 |
|
LBL(65) LBL(66) |
| 5833 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 5834 |
LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30) |
LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30) |
| 5835 |
LBL(32) LBL(34) LBL(42) LBL(46) |
LBL(32) LBL(34) LBL(42) LBL(46) |
| 5931 |
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets, |
PCRE_SPTR subject, int length, int start_offset, int options, int *offsets, |
| 5932 |
int offsetcount) |
int offsetcount) |
| 5933 |
{ |
{ |
| 5934 |
int rc, ocount; |
int rc, ocount, arg_offset_max; |
| 5935 |
int first_byte = -1; |
int first_byte = -1; |
| 5936 |
int req_byte = -1; |
int req_byte = -1; |
| 5937 |
int req_byte2 = -1; |
int req_byte2 = -1; |
| 5967 |
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; |
if (offsetcount < 0) return PCRE_ERROR_BADCOUNT; |
| 5968 |
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; |
if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET; |
| 5969 |
|
|
| 5970 |
/* This information is for finding all the numbers associated with a given |
/* These two settings are used in the code for checking a UTF-8 string that |
| 5971 |
name, for condition testing. */ |
follows immediately afterwards. Other values in the md block are used only |
| 5972 |
|
during "normal" pcre_exec() processing, not when the JIT support is in use, |
| 5973 |
|
so they are set up later. */ |
| 5974 |
|
|
| 5975 |
|
utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0; |
| 5976 |
|
md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 : |
| 5977 |
|
((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0; |
| 5978 |
|
|
| 5979 |
|
/* Check a UTF-8 string if required. Pass back the character offset and error |
| 5980 |
|
code for an invalid string if a results vector is available. */ |
| 5981 |
|
|
| 5982 |
|
#ifdef SUPPORT_UTF8 |
| 5983 |
|
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
| 5984 |
|
{ |
| 5985 |
|
int erroroffset; |
| 5986 |
|
int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset); |
| 5987 |
|
if (errorcode != 0) |
| 5988 |
|
{ |
| 5989 |
|
if (offsetcount >= 2) |
| 5990 |
|
{ |
| 5991 |
|
offsets[0] = erroroffset; |
| 5992 |
|
offsets[1] = errorcode; |
| 5993 |
|
} |
| 5994 |
|
return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)? |
| 5995 |
|
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; |
| 5996 |
|
} |
| 5997 |
|
|
| 5998 |
|
/* Check that a start_offset points to the start of a UTF-8 character. */ |
| 5999 |
|
if (start_offset > 0 && start_offset < length && |
| 6000 |
|
(((USPTR)subject)[start_offset] & 0xc0) == 0x80) |
| 6001 |
|
return PCRE_ERROR_BADUTF8_OFFSET; |
| 6002 |
|
} |
| 6003 |
|
#endif |
| 6004 |
|
|
| 6005 |
|
/* If the pattern was successfully studied with JIT support, run the JIT |
| 6006 |
|
executable instead of the rest of this function. Most options must be set at |
| 6007 |
|
compile time for the JIT code to be usable. Fallback to the normal code path if |
| 6008 |
|
an unsupported flag is set. In particular, JIT does not support partial |
| 6009 |
|
matching. */ |
| 6010 |
|
|
| 6011 |
|
#ifdef SUPPORT_JIT |
| 6012 |
|
if (extra_data != NULL |
| 6013 |
|
&& (extra_data->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 |
| 6014 |
|
&& extra_data->executable_jit != NULL |
| 6015 |
|
&& (extra_data->flags & PCRE_EXTRA_TABLES) == 0 |
| 6016 |
|
&& (options & ~(PCRE_NO_UTF8_CHECK | PCRE_NOTBOL | PCRE_NOTEOL | |
| 6017 |
|
PCRE_NOTEMPTY | PCRE_NOTEMPTY_ATSTART)) == 0) |
| 6018 |
|
return _pcre_jit_exec(re, extra_data->executable_jit, subject, length, |
| 6019 |
|
start_offset, options, ((extra_data->flags & PCRE_EXTRA_MATCH_LIMIT) == 0) |
| 6020 |
|
? MATCH_LIMIT : extra_data->match_limit, offsets, offsetcount); |
| 6021 |
|
#endif |
| 6022 |
|
|
| 6023 |
|
/* Carry on with non-JIT matching. This information is for finding all the |
| 6024 |
|
numbers associated with a given name, for condition testing. */ |
| 6025 |
|
|
| 6026 |
md->name_table = (uschar *)re + re->name_table_offset; |
md->name_table = (uschar *)re + re->name_table_offset; |
| 6027 |
md->name_count = re->name_count; |
md->name_count = re->name_count; |
| 6088 |
end_subject = md->end_subject; |
end_subject = md->end_subject; |
| 6089 |
|
|
| 6090 |
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
|
utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0; |
|
| 6091 |
md->use_ucp = (re->options & PCRE_UCP) != 0; |
md->use_ucp = (re->options & PCRE_UCP) != 0; |
| 6092 |
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
| 6093 |
|
|
| 6098 |
md->noteol = (options & PCRE_NOTEOL) != 0; |
md->noteol = (options & PCRE_NOTEOL) != 0; |
| 6099 |
md->notempty = (options & PCRE_NOTEMPTY) != 0; |
md->notempty = (options & PCRE_NOTEMPTY) != 0; |
| 6100 |
md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; |
md->notempty_atstart = (options & PCRE_NOTEMPTY_ATSTART) != 0; |
|
md->partial = ((options & PCRE_PARTIAL_HARD) != 0)? 2 : |
|
|
((options & PCRE_PARTIAL_SOFT) != 0)? 1 : 0; |
|
|
|
|
| 6101 |
|
|
| 6102 |
md->hitend = FALSE; |
md->hitend = FALSE; |
| 6103 |
md->mark = NULL; /* In case never set */ |
md->mark = NULL; /* In case never set */ |
| 6104 |
|
|
| 6105 |
md->recursive = NULL; /* No recursion at top level */ |
md->recursive = NULL; /* No recursion at top level */ |
| 6106 |
|
md->hasthen = (re->flags & PCRE_HASTHEN) != 0; |
| 6107 |
|
|
| 6108 |
md->lcc = tables + lcc_offset; |
md->lcc = tables + lcc_offset; |
| 6109 |
md->ctypes = tables + ctypes_offset; |
md->ctypes = tables + ctypes_offset; |
| 6181 |
if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0) |
if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0) |
| 6182 |
return PCRE_ERROR_BADPARTIAL; |
return PCRE_ERROR_BADPARTIAL; |
| 6183 |
|
|
|
/* Check a UTF-8 string if required. Pass back the character offset and error |
|
|
code for an invalid string if a results vector is available. */ |
|
|
|
|
|
#ifdef SUPPORT_UTF8 |
|
|
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
|
|
{ |
|
|
int erroroffset; |
|
|
int errorcode = _pcre_valid_utf8((USPTR)subject, length, &erroroffset); |
|
|
if (errorcode != 0) |
|
|
{ |
|
|
if (offsetcount >= 2) |
|
|
{ |
|
|
offsets[0] = erroroffset; |
|
|
offsets[1] = errorcode; |
|
|
} |
|
|
return (errorcode <= PCRE_UTF8_ERR5 && md->partial > 1)? |
|
|
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; |
|
|
} |
|
|
|
|
|
/* Check that a start_offset points to the start of a UTF-8 character. */ |
|
|
|
|
|
if (start_offset > 0 && start_offset < length && |
|
|
(((USPTR)subject)[start_offset] & 0xc0) == 0x80) |
|
|
return PCRE_ERROR_BADUTF8_OFFSET; |
|
|
} |
|
|
#endif |
|
|
|
|
| 6184 |
/* If the expression has got more back references than the offsets supplied can |
/* If the expression has got more back references than the offsets supplied can |
| 6185 |
hold, we get a temporary chunk of working store to use during the matching. |
hold, we get a temporary chunk of working store to use during the matching. |
| 6186 |
Otherwise, we can use the vector supplied, rounding down its size to a multiple |
Otherwise, we can use the vector supplied, rounding down its size to a multiple |
| 6187 |
of 3. */ |
of 3. */ |
| 6188 |
|
|
| 6189 |
ocount = offsetcount - (offsetcount % 3); |
ocount = offsetcount - (offsetcount % 3); |
| 6190 |
|
arg_offset_max = (2*ocount)/3; |
| 6191 |
|
|
| 6192 |
if (re->top_backref > 0 && re->top_backref >= ocount/3) |
if (re->top_backref > 0 && re->top_backref >= ocount/3) |
| 6193 |
{ |
{ |
| 6367 |
/* The following two optimizations are disabled for partial matching or if |
/* The following two optimizations are disabled for partial matching or if |
| 6368 |
disabling is explicitly requested. */ |
disabling is explicitly requested. */ |
| 6369 |
|
|
| 6370 |
if ((options & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial) |
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 && !md->partial) |
| 6371 |
{ |
{ |
| 6372 |
/* If the pattern was studied, a minimum subject length may be set. This is |
/* If the pattern was studied, a minimum subject length may be set. This is |
| 6373 |
a lower bound; no actual string of that length may actually match the |
a lower bound; no actual string of that length may actually match the |
| 6562 |
{ |
{ |
| 6563 |
if (using_temporary_offsets) |
if (using_temporary_offsets) |
| 6564 |
{ |
{ |
| 6565 |
if (offsetcount >= 4) |
if (arg_offset_max >= 4) |
| 6566 |
{ |
{ |
| 6567 |
memcpy(offsets + 2, md->offset_vector + 2, |
memcpy(offsets + 2, md->offset_vector + 2, |
| 6568 |
(offsetcount - 2) * sizeof(int)); |
(arg_offset_max - 2) * sizeof(int)); |
| 6569 |
DPRINTF(("Copied offsets from temporary memory\n")); |
DPRINTF(("Copied offsets from temporary memory\n")); |
| 6570 |
} |
} |
| 6571 |
if (md->end_offset_top > offsetcount) md->offset_overflow = TRUE; |
if (md->end_offset_top > arg_offset_max) md->offset_overflow = TRUE; |
| 6572 |
DPRINTF(("Freeing temporary memory\n")); |
DPRINTF(("Freeing temporary memory\n")); |
| 6573 |
(pcre_free)(md->offset_vector); |
(pcre_free)(md->offset_vector); |
| 6574 |
} |
} |
| 6575 |
|
|
| 6576 |
/* Set the return code to the number of captured strings, or 0 if there are |
/* Set the return code to the number of captured strings, or 0 if there were |
| 6577 |
too many to fit into the vector. */ |
too many to fit into the vector. */ |
| 6578 |
|
|
| 6579 |
rc = md->offset_overflow? 0 : md->end_offset_top/2; |
rc = (md->offset_overflow && md->end_offset_top >= arg_offset_max)? |
| 6580 |
|
0 : md->end_offset_top/2; |
| 6581 |
|
|
| 6582 |
/* If there is space in the offset vector, set any unused pairs at the end of |
/* If there is space in the offset vector, set any unused pairs at the end of |
| 6583 |
the pattern to -1 for backwards compatibility. It is documented that this |
the pattern to -1 for backwards compatibility. It is documented that this |
| 6584 |
happens. In earlier versions, the whole set of potential capturing offsets |
happens. In earlier versions, the whole set of potential capturing offsets |
| 6585 |
was set to -1 each time round the loop, but this is handled differently now. |
was set to -1 each time round the loop, but this is handled differently now. |
| 6586 |
"Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only |
"Gaps" are set to -1 dynamically instead (this fixes a bug). Thus, it is only |
| 6587 |
those at the end that need unsetting here. We can't just unset them all at |
those at the end that need unsetting here. We can't just unset them all at |
| 6588 |
the start of the whole thing because they may get set in one branch that is |
the start of the whole thing because they may get set in one branch that is |
| 6589 |
not the final matching branch. */ |
not the final matching branch. */ |