| 6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
| 7 |
|
|
| 8 |
Written by Philip Hazel |
Written by Philip Hazel |
| 9 |
Copyright (c) 1997-2007 University of Cambridge |
Copyright (c) 1997-2008 University of Cambridge |
| 10 |
|
|
| 11 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 12 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 43 |
possible. There are also some static supporting functions. */ |
possible. There are also some static supporting functions. */ |
| 44 |
|
|
| 45 |
#ifdef HAVE_CONFIG_H |
#ifdef HAVE_CONFIG_H |
| 46 |
#include <config.h> |
#include "config.h" |
| 47 |
#endif |
#endif |
| 48 |
|
|
| 49 |
#define NLBLOCK md /* Block containing newline information */ |
#define NLBLOCK md /* Block containing newline information */ |
| 1148 |
do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
do ecode += GET(ecode,1); while (*ecode == OP_ALT); |
| 1149 |
break; |
break; |
| 1150 |
|
|
| 1151 |
/* BRAZERO and BRAMINZERO occur just before a bracket group, indicating |
/* BRAZERO, BRAMINZERO and SKIPZERO occur just before a bracket group, |
| 1152 |
that it may occur zero times. It may repeat infinitely, or not at all - |
indicating that it may occur zero times. It may repeat infinitely, or not |
| 1153 |
i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper |
at all - i.e. it could be ()* or ()? or even (){0} in the pattern. Brackets |
| 1154 |
repeat limits are compiled as a number of copies, with the optional ones |
with fixed upper repeat limits are compiled as a number of copies, with the |
| 1155 |
preceded by BRAZERO or BRAMINZERO. */ |
optional ones preceded by BRAZERO or BRAMINZERO. */ |
| 1156 |
|
|
| 1157 |
case OP_BRAZERO: |
case OP_BRAZERO: |
| 1158 |
{ |
{ |
| 1174 |
} |
} |
| 1175 |
break; |
break; |
| 1176 |
|
|
| 1177 |
|
case OP_SKIPZERO: |
| 1178 |
|
{ |
| 1179 |
|
next = ecode+1; |
| 1180 |
|
do next += GET(next,1); while (*next == OP_ALT); |
| 1181 |
|
ecode = next + 1 + LINK_SIZE; |
| 1182 |
|
} |
| 1183 |
|
break; |
| 1184 |
|
|
| 1185 |
/* End of a group, repeated or non-repeating. */ |
/* End of a group, repeated or non-repeating. */ |
| 1186 |
|
|
| 1187 |
case OP_KET: |
case OP_KET: |
| 1429 |
/* Match a single character type; inline for speed */ |
/* Match a single character type; inline for speed */ |
| 1430 |
|
|
| 1431 |
case OP_ANY: |
case OP_ANY: |
| 1432 |
if ((ims & PCRE_DOTALL) == 0) |
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); |
| 1433 |
{ |
/* Fall through */ |
| 1434 |
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); |
|
| 1435 |
} |
case OP_ALLANY: |
| 1436 |
if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH); |
if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 1437 |
if (utf8) |
if (utf8) while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
|
| 1438 |
ecode++; |
ecode++; |
| 1439 |
break; |
break; |
| 1440 |
|
|
| 1533 |
case 0x000d: |
case 0x000d: |
| 1534 |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
| 1535 |
break; |
break; |
| 1536 |
|
|
| 1537 |
case 0x000a: |
case 0x000a: |
| 1538 |
|
break; |
| 1539 |
|
|
| 1540 |
case 0x000b: |
case 0x000b: |
| 1541 |
case 0x000c: |
case 0x000c: |
| 1542 |
case 0x0085: |
case 0x0085: |
| 1543 |
case 0x2028: |
case 0x2028: |
| 1544 |
case 0x2029: |
case 0x2029: |
| 1545 |
|
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
| 1546 |
break; |
break; |
| 1547 |
} |
} |
| 1548 |
ecode++; |
ecode++; |
| 1730 |
case OP_REF: |
case OP_REF: |
| 1731 |
{ |
{ |
| 1732 |
offset = GET2(ecode, 1) << 1; /* Doubled ref number */ |
offset = GET2(ecode, 1) << 1; /* Doubled ref number */ |
| 1733 |
ecode += 3; /* Advance past item */ |
ecode += 3; |
| 1734 |
|
|
| 1735 |
/* If the reference is unset, set the length to be longer than the amount |
/* If the reference is unset, there are two possibilities: |
| 1736 |
of subject left; this ensures that every attempt at a match fails. We |
|
| 1737 |
can't just fail here, because of the possibility of quantifiers with zero |
(a) In the default, Perl-compatible state, set the length to be longer |
| 1738 |
minima. */ |
than the amount of subject left; this ensures that every attempt at a |
| 1739 |
|
match fails. We can't just fail here, because of the possibility of |
| 1740 |
length = (offset >= offset_top || md->offset_vector[offset] < 0)? |
quantifiers with zero minima. |
| 1741 |
md->end_subject - eptr + 1 : |
|
| 1742 |
md->offset_vector[offset+1] - md->offset_vector[offset]; |
(b) If the JavaScript compatibility flag is set, set the length to zero |
| 1743 |
|
so that the back reference matches an empty string. |
| 1744 |
|
|
| 1745 |
|
Otherwise, set the length to the length of what was matched by the |
| 1746 |
|
referenced subpattern. */ |
| 1747 |
|
|
| 1748 |
|
if (offset >= offset_top || md->offset_vector[offset] < 0) |
| 1749 |
|
length = (md->jscript_compat)? 0 : md->end_subject - eptr + 1; |
| 1750 |
|
else |
| 1751 |
|
length = md->offset_vector[offset+1] - md->offset_vector[offset]; |
| 1752 |
|
|
| 1753 |
/* Set up for repetition, or handle the non-repeated case */ |
/* Set up for repetition, or handle the non-repeated case */ |
| 1754 |
|
|
| 2951 |
case OP_ANY: |
case OP_ANY: |
| 2952 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 2953 |
{ |
{ |
| 2954 |
if (eptr >= md->end_subject || |
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) |
|
((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr))) |
|
| 2955 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
| 2956 |
eptr++; |
eptr++; |
| 2957 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 2958 |
} |
} |
| 2959 |
break; |
break; |
| 2960 |
|
|
| 2961 |
|
case OP_ALLANY: |
| 2962 |
|
for (i = 1; i <= min; i++) |
| 2963 |
|
{ |
| 2964 |
|
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 2965 |
|
eptr++; |
| 2966 |
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 2967 |
|
} |
| 2968 |
|
break; |
| 2969 |
|
|
| 2970 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 2971 |
eptr += min; |
eptr += min; |
| 2972 |
break; |
break; |
| 2982 |
case 0x000d: |
case 0x000d: |
| 2983 |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
| 2984 |
break; |
break; |
| 2985 |
|
|
| 2986 |
case 0x000a: |
case 0x000a: |
| 2987 |
|
break; |
| 2988 |
|
|
| 2989 |
case 0x000b: |
case 0x000b: |
| 2990 |
case 0x000c: |
case 0x000c: |
| 2991 |
case 0x0085: |
case 0x0085: |
| 2992 |
case 0x2028: |
case 0x2028: |
| 2993 |
case 0x2029: |
case 0x2029: |
| 2994 |
|
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
| 2995 |
break; |
break; |
| 2996 |
} |
} |
| 2997 |
} |
} |
| 3175 |
switch(ctype) |
switch(ctype) |
| 3176 |
{ |
{ |
| 3177 |
case OP_ANY: |
case OP_ANY: |
| 3178 |
if ((ims & PCRE_DOTALL) == 0) |
for (i = 1; i <= min; i++) |
| 3179 |
{ |
{ |
| 3180 |
for (i = 1; i <= min; i++) |
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); |
| 3181 |
{ |
eptr++; |
|
if (IS_NEWLINE(eptr)) RRETURN(MATCH_NOMATCH); |
|
|
eptr++; |
|
|
} |
|
| 3182 |
} |
} |
| 3183 |
else eptr += min; |
break; |
| 3184 |
|
|
| 3185 |
|
case OP_ALLANY: |
| 3186 |
|
eptr += min; |
| 3187 |
break; |
break; |
| 3188 |
|
|
| 3189 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 3204 |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
| 3205 |
break; |
break; |
| 3206 |
case 0x000a: |
case 0x000a: |
| 3207 |
|
break; |
| 3208 |
|
|
| 3209 |
case 0x000b: |
case 0x000b: |
| 3210 |
case 0x000c: |
case 0x000c: |
| 3211 |
case 0x0085: |
case 0x0085: |
| 3212 |
|
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
| 3213 |
break; |
break; |
| 3214 |
} |
} |
| 3215 |
} |
} |
| 3440 |
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42); |
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM42); |
| 3441 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 3442 |
if (fi >= max || eptr >= md->end_subject || |
if (fi >= max || eptr >= md->end_subject || |
| 3443 |
(ctype == OP_ANY && (ims & PCRE_DOTALL) == 0 && |
(ctype == OP_ANY && IS_NEWLINE(eptr))) |
|
IS_NEWLINE(eptr))) |
|
| 3444 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
| 3445 |
|
|
| 3446 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
| 3447 |
switch(ctype) |
switch(ctype) |
| 3448 |
{ |
{ |
| 3449 |
case OP_ANY: /* This is the DOTALL case */ |
case OP_ANY: /* This is the non-NL case */ |
| 3450 |
break; |
case OP_ALLANY: |
|
|
|
| 3451 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 3452 |
break; |
break; |
| 3453 |
|
|
| 3459 |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
| 3460 |
break; |
break; |
| 3461 |
case 0x000a: |
case 0x000a: |
| 3462 |
|
break; |
| 3463 |
|
|
| 3464 |
case 0x000b: |
case 0x000b: |
| 3465 |
case 0x000c: |
case 0x000c: |
| 3466 |
case 0x0085: |
case 0x0085: |
| 3467 |
case 0x2028: |
case 0x2028: |
| 3468 |
case 0x2029: |
case 0x2029: |
| 3469 |
|
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
| 3470 |
break; |
break; |
| 3471 |
} |
} |
| 3472 |
break; |
break; |
| 3599 |
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43); |
RMATCH(eptr, ecode, offset_top, md, ims, eptrb, 0, RM43); |
| 3600 |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 3601 |
if (fi >= max || eptr >= md->end_subject || |
if (fi >= max || eptr >= md->end_subject || |
| 3602 |
((ims & PCRE_DOTALL) == 0 && IS_NEWLINE(eptr))) |
(ctype == OP_ANY && IS_NEWLINE(eptr))) |
| 3603 |
RRETURN(MATCH_NOMATCH); |
RRETURN(MATCH_NOMATCH); |
| 3604 |
|
|
| 3605 |
c = *eptr++; |
c = *eptr++; |
| 3606 |
switch(ctype) |
switch(ctype) |
| 3607 |
{ |
{ |
| 3608 |
case OP_ANY: /* This is the DOTALL case */ |
case OP_ANY: /* This is the non-NL case */ |
| 3609 |
break; |
case OP_ALLANY: |
|
|
|
| 3610 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 3611 |
break; |
break; |
| 3612 |
|
|
| 3617 |
case 0x000d: |
case 0x000d: |
| 3618 |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
if (eptr < md->end_subject && *eptr == 0x0a) eptr++; |
| 3619 |
break; |
break; |
| 3620 |
|
|
| 3621 |
case 0x000a: |
case 0x000a: |
| 3622 |
|
break; |
| 3623 |
|
|
| 3624 |
case 0x000b: |
case 0x000b: |
| 3625 |
case 0x000c: |
case 0x000c: |
| 3626 |
case 0x0085: |
case 0x0085: |
| 3627 |
|
if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); |
| 3628 |
break; |
break; |
| 3629 |
} |
} |
| 3630 |
break; |
break; |
| 3860 |
case OP_ANY: |
case OP_ANY: |
| 3861 |
if (max < INT_MAX) |
if (max < INT_MAX) |
| 3862 |
{ |
{ |
| 3863 |
if ((ims & PCRE_DOTALL) == 0) |
for (i = min; i < max; i++) |
|
{ |
|
|
for (i = min; i < max; i++) |
|
|
{ |
|
|
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; |
|
|
eptr++; |
|
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
|
|
} |
|
|
} |
|
|
else |
|
| 3864 |
{ |
{ |
| 3865 |
for (i = min; i < max; i++) |
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; |
| 3866 |
{ |
eptr++; |
| 3867 |
if (eptr >= md->end_subject) break; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
|
eptr++; |
|
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
|
|
} |
|
| 3868 |
} |
} |
| 3869 |
} |
} |
| 3870 |
|
|
| 3872 |
|
|
| 3873 |
else |
else |
| 3874 |
{ |
{ |
| 3875 |
if ((ims & PCRE_DOTALL) == 0) |
for (i = min; i < max; i++) |
| 3876 |
{ |
{ |
| 3877 |
for (i = min; i < max; i++) |
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; |
| 3878 |
{ |
eptr++; |
| 3879 |
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
|
eptr++; |
|
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
|
|
} |
|
| 3880 |
} |
} |
| 3881 |
else |
} |
| 3882 |
|
break; |
| 3883 |
|
|
| 3884 |
|
case OP_ALLANY: |
| 3885 |
|
if (max < INT_MAX) |
| 3886 |
|
{ |
| 3887 |
|
for (i = min; i < max; i++) |
| 3888 |
{ |
{ |
| 3889 |
eptr = md->end_subject; |
if (eptr >= md->end_subject) break; |
| 3890 |
|
eptr++; |
| 3891 |
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 3892 |
} |
} |
| 3893 |
} |
} |
| 3894 |
|
else eptr = md->end_subject; /* Unlimited UTF-8 repeat */ |
| 3895 |
break; |
break; |
| 3896 |
|
|
| 3897 |
/* The byte case is the same as non-UTF8 */ |
/* The byte case is the same as non-UTF8 */ |
| 3916 |
} |
} |
| 3917 |
else |
else |
| 3918 |
{ |
{ |
| 3919 |
if (c != 0x000a && c != 0x000b && c != 0x000c && |
if (c != 0x000a && |
| 3920 |
c != 0x0085 && c != 0x2028 && c != 0x2029) |
(md->bsr_anycrlf || |
| 3921 |
|
(c != 0x000b && c != 0x000c && |
| 3922 |
|
c != 0x0085 && c != 0x2028 && c != 0x2029))) |
| 3923 |
break; |
break; |
| 3924 |
eptr += len; |
eptr += len; |
| 3925 |
} |
} |
| 4079 |
switch(ctype) |
switch(ctype) |
| 4080 |
{ |
{ |
| 4081 |
case OP_ANY: |
case OP_ANY: |
| 4082 |
if ((ims & PCRE_DOTALL) == 0) |
for (i = min; i < max; i++) |
| 4083 |
{ |
{ |
| 4084 |
for (i = min; i < max; i++) |
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; |
| 4085 |
{ |
eptr++; |
|
if (eptr >= md->end_subject || IS_NEWLINE(eptr)) break; |
|
|
eptr++; |
|
|
} |
|
|
break; |
|
| 4086 |
} |
} |
| 4087 |
/* For DOTALL case, fall through and treat as \C */ |
break; |
| 4088 |
|
|
| 4089 |
|
case OP_ALLANY: |
| 4090 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 4091 |
c = max - min; |
c = max - min; |
| 4092 |
if (c > (unsigned int)(md->end_subject - eptr)) |
if (c > (unsigned int)(md->end_subject - eptr)) |
| 4106 |
} |
} |
| 4107 |
else |
else |
| 4108 |
{ |
{ |
| 4109 |
if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085) |
if (c != 0x000a && |
| 4110 |
|
(md->bsr_anycrlf || |
| 4111 |
|
(c != 0x000b && c != 0x000c && c != 0x0085))) |
| 4112 |
break; |
break; |
| 4113 |
eptr++; |
eptr++; |
| 4114 |
} |
} |
| 4258 |
switch (frame->Xwhere) |
switch (frame->Xwhere) |
| 4259 |
{ |
{ |
| 4260 |
LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8) |
LBL( 1) LBL( 2) LBL( 3) LBL( 4) LBL( 5) LBL( 6) LBL( 7) LBL( 8) |
| 4261 |
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(16) |
LBL( 9) LBL(10) LBL(11) LBL(12) LBL(13) LBL(14) LBL(15) LBL(17) |
| 4262 |
LBL(17) LBL(18) LBL(19) LBL(20) LBL(21) LBL(22) LBL(23) LBL(24) |
LBL(19) LBL(24) LBL(25) LBL(26) LBL(27) LBL(29) LBL(31) LBL(33) |
| 4263 |
LBL(25) LBL(26) LBL(27) LBL(28) LBL(29) LBL(30) LBL(31) LBL(32) |
LBL(35) LBL(43) LBL(47) LBL(48) LBL(49) LBL(50) LBL(51) LBL(52) |
| 4264 |
LBL(33) LBL(34) LBL(35) LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) |
LBL(53) LBL(54) |
| 4265 |
LBL(41) LBL(42) LBL(43) LBL(44) LBL(45) LBL(46) LBL(47) LBL(48) |
#ifdef SUPPORT_UTF8 |
| 4266 |
LBL(49) LBL(50) LBL(51) LBL(52) LBL(53) LBL(54) |
LBL(16) LBL(18) LBL(20) LBL(21) LBL(22) LBL(23) LBL(28) LBL(30) |
| 4267 |
|
LBL(32) LBL(34) LBL(42) LBL(46) |
| 4268 |
|
#ifdef SUPPORT_UCP |
| 4269 |
|
LBL(36) LBL(37) LBL(38) LBL(39) LBL(40) LBL(41) LBL(44) LBL(45) |
| 4270 |
|
#endif /* SUPPORT_UCP */ |
| 4271 |
|
#endif /* SUPPORT_UTF8 */ |
| 4272 |
default: |
default: |
| 4273 |
DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); |
DPRINTF(("jump error in pcre match: label %d non-existent\n", frame->Xwhere)); |
| 4274 |
return PCRE_ERROR_INTERNAL; |
return PCRE_ERROR_INTERNAL; |
| 4447 |
/* Set up other data */ |
/* Set up other data */ |
| 4448 |
|
|
| 4449 |
anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
| 4450 |
startline = (re->options & PCRE_STARTLINE) != 0; |
startline = (re->flags & PCRE_STARTLINE) != 0; |
| 4451 |
firstline = (re->options & PCRE_FIRSTLINE) != 0; |
firstline = (re->options & PCRE_FIRSTLINE) != 0; |
| 4452 |
|
|
| 4453 |
/* The code starts after the real_pcre block and the capture name table. */ |
/* The code starts after the real_pcre block and the capture name table. */ |
| 4462 |
|
|
| 4463 |
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
md->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
| 4464 |
utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0; |
utf8 = md->utf8 = (re->options & PCRE_UTF8) != 0; |
| 4465 |
|
md->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0; |
| 4466 |
|
|
| 4467 |
md->notbol = (options & PCRE_NOTBOL) != 0; |
md->notbol = (options & PCRE_NOTBOL) != 0; |
| 4468 |
md->noteol = (options & PCRE_NOTEOL) != 0; |
md->noteol = (options & PCRE_NOTEOL) != 0; |
| 4475 |
md->lcc = tables + lcc_offset; |
md->lcc = tables + lcc_offset; |
| 4476 |
md->ctypes = tables + ctypes_offset; |
md->ctypes = tables + ctypes_offset; |
| 4477 |
|
|
| 4478 |
|
/* Handle different \R options. */ |
| 4479 |
|
|
| 4480 |
|
switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) |
| 4481 |
|
{ |
| 4482 |
|
case 0: |
| 4483 |
|
if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0) |
| 4484 |
|
md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0; |
| 4485 |
|
else |
| 4486 |
|
#ifdef BSR_ANYCRLF |
| 4487 |
|
md->bsr_anycrlf = TRUE; |
| 4488 |
|
#else |
| 4489 |
|
md->bsr_anycrlf = FALSE; |
| 4490 |
|
#endif |
| 4491 |
|
break; |
| 4492 |
|
|
| 4493 |
|
case PCRE_BSR_ANYCRLF: |
| 4494 |
|
md->bsr_anycrlf = TRUE; |
| 4495 |
|
break; |
| 4496 |
|
|
| 4497 |
|
case PCRE_BSR_UNICODE: |
| 4498 |
|
md->bsr_anycrlf = FALSE; |
| 4499 |
|
break; |
| 4500 |
|
|
| 4501 |
|
default: return PCRE_ERROR_BADNEWLINE; |
| 4502 |
|
} |
| 4503 |
|
|
| 4504 |
/* Handle different types of newline. The three bits give eight cases. If |
/* Handle different types of newline. The three bits give eight cases. If |
| 4505 |
nothing is set at run time, whatever was used at compile time applies. */ |
nothing is set at run time, whatever was used at compile time applies. */ |
| 4506 |
|
|
| 4507 |
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) & |
switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : |
| 4508 |
PCRE_NEWLINE_BITS) |
(pcre_uint32)options) & PCRE_NEWLINE_BITS) |
| 4509 |
{ |
{ |
| 4510 |
case 0: newline = NEWLINE; break; /* Compile-time default */ |
case 0: newline = NEWLINE; break; /* Compile-time default */ |
| 4511 |
case PCRE_NEWLINE_CR: newline = '\r'; break; |
case PCRE_NEWLINE_CR: newline = '\r'; break; |
| 4544 |
/* Partial matching is supported only for a restricted set of regexes at the |
/* Partial matching is supported only for a restricted set of regexes at the |
| 4545 |
moment. */ |
moment. */ |
| 4546 |
|
|
| 4547 |
if (md->partial && (re->options & PCRE_NOPARTIAL) != 0) |
if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0) |
| 4548 |
return PCRE_ERROR_BADPARTIAL; |
return PCRE_ERROR_BADPARTIAL; |
| 4549 |
|
|
| 4550 |
/* Check a UTF-8 string if required. Unfortunately there's no way of passing |
/* Check a UTF-8 string if required. Unfortunately there's no way of passing |
| 4621 |
|
|
| 4622 |
if (!anchored) |
if (!anchored) |
| 4623 |
{ |
{ |
| 4624 |
if ((re->options & PCRE_FIRSTSET) != 0) |
if ((re->flags & PCRE_FIRSTSET) != 0) |
| 4625 |
{ |
{ |
| 4626 |
first_byte = re->first_byte & 255; |
first_byte = re->first_byte & 255; |
| 4627 |
if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE) |
if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE) |
| 4636 |
/* For anchored or unanchored matches, there may be a "last known required |
/* For anchored or unanchored matches, there may be a "last known required |
| 4637 |
character" set. */ |
character" set. */ |
| 4638 |
|
|
| 4639 |
if ((re->options & PCRE_REQCHSET) != 0) |
if ((re->flags & PCRE_REQCHSET) != 0) |
| 4640 |
{ |
{ |
| 4641 |
req_byte = re->req_byte & 255; |
req_byte = re->req_byte & 255; |
| 4642 |
req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0; |
req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0; |
| 4683 |
if (first_byte_caseless) |
if (first_byte_caseless) |
| 4684 |
while (start_match < end_subject && |
while (start_match < end_subject && |
| 4685 |
md->lcc[*start_match] != first_byte) |
md->lcc[*start_match] != first_byte) |
| 4686 |
start_match++; |
{ NEXTCHAR(start_match); } |
| 4687 |
else |
else |
| 4688 |
while (start_match < end_subject && *start_match != first_byte) |
while (start_match < end_subject && *start_match != first_byte) |
| 4689 |
start_match++; |
{ NEXTCHAR(start_match); } |
| 4690 |
} |
} |
| 4691 |
|
|
| 4692 |
/* Or to just after a linebreak for a multiline match if possible */ |
/* Or to just after a linebreak for a multiline match if possible */ |
| 4696 |
if (start_match > md->start_subject + start_offset) |
if (start_match > md->start_subject + start_offset) |
| 4697 |
{ |
{ |
| 4698 |
while (start_match <= end_subject && !WAS_NEWLINE(start_match)) |
while (start_match <= end_subject && !WAS_NEWLINE(start_match)) |
| 4699 |
start_match++; |
{ NEXTCHAR(start_match); } |
| 4700 |
|
|
| 4701 |
/* If we have just passed a CR and the newline option is ANY or ANYCRLF, |
/* If we have just passed a CR and the newline option is ANY or ANYCRLF, |
| 4702 |
and we are now at a LF, advance the match position by one more character. |
and we are now at a LF, advance the match position by one more character. |
| 4717 |
while (start_match < end_subject) |
while (start_match < end_subject) |
| 4718 |
{ |
{ |
| 4719 |
register unsigned int c = *start_match; |
register unsigned int c = *start_match; |
| 4720 |
if ((start_bits[c/8] & (1 << (c&7))) == 0) start_match++; else break; |
if ((start_bits[c/8] & (1 << (c&7))) == 0) |
| 4721 |
|
{ NEXTCHAR(start_match); } |
| 4722 |
|
else break; |
| 4723 |
} |
} |
| 4724 |
} |
} |
| 4725 |
|
|
| 4853 |
|
|
| 4854 |
if (anchored || start_match > end_subject) break; |
if (anchored || start_match > end_subject) break; |
| 4855 |
|
|
| 4856 |
/* If we have just passed a CR and the newline option is CRLF or ANY or |
/* If we have just passed a CR and we are now at a LF, and the pattern does |
| 4857 |
ANYCRLF, and we are now at a LF, advance the match position by one more |
not contain any explicit matches for \r or \n, and the newline option is CRLF |
| 4858 |
character. */ |
or ANY or ANYCRLF, advance the match position by one more character. */ |
| 4859 |
|
|
| 4860 |
if (start_match[-1] == '\r' && |
if (start_match[-1] == '\r' && |
| 4861 |
(md->nltype == NLTYPE_ANY || |
start_match < end_subject && |
| 4862 |
md->nltype == NLTYPE_ANYCRLF || |
*start_match == '\n' && |
| 4863 |
md->nllen == 2) && |
(re->flags & PCRE_HASCRORLF) == 0 && |
| 4864 |
start_match < end_subject && |
(md->nltype == NLTYPE_ANY || |
| 4865 |
*start_match == '\n') |
md->nltype == NLTYPE_ANYCRLF || |
| 4866 |
|
md->nllen == 2)) |
| 4867 |
start_match++; |
start_match++; |
| 4868 |
|
|
| 4869 |
} /* End of for(;;) "bumpalong" loop */ |
} /* End of for(;;) "bumpalong" loop */ |