| 3 |
*************************************************/ |
*************************************************/ |
| 4 |
|
|
| 5 |
/* PCRE is a library of functions to support regular expressions whose syntax |
/* PCRE is a library of functions to support regular expressions whose syntax |
| 6 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language (but see |
| 7 |
|
below for why this module is different). |
| 8 |
|
|
| 9 |
Written by Philip Hazel |
Written by Philip Hazel |
| 10 |
Copyright (c) 1997-2008 University of Cambridge |
Copyright (c) 1997-2009 University of Cambridge |
| 11 |
|
|
| 12 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 13 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 224 |
rlevel function call recursion level |
rlevel function call recursion level |
| 225 |
recursing regex recursive call level |
recursing regex recursive call level |
| 226 |
|
|
| 227 |
Returns: > 0 => number of match offset pairs placed in offsets |
Returns: > 0 => number of match offset pairs placed in offsets |
| 228 |
= 0 => offsets overflowed; longest matches are present |
= 0 => offsets overflowed; longest matches are present |
| 229 |
-1 => failed to match |
-1 => failed to match |
| 230 |
< -1 => some kind of unexpected problem |
< -1 => some kind of unexpected problem |
| 513 |
const uschar *code; |
const uschar *code; |
| 514 |
int state_offset = current_state->offset; |
int state_offset = current_state->offset; |
| 515 |
int count, codevalue; |
int count, codevalue; |
|
#ifdef SUPPORT_UCP |
|
|
int chartype, script; |
|
|
#endif |
|
| 516 |
|
|
| 517 |
#ifdef DEBUG |
#ifdef DEBUG |
| 518 |
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); |
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); |
| 758 |
if ((md->moptions & PCRE_NOTEOL) == 0) |
if ((md->moptions & PCRE_NOTEOL) == 0) |
| 759 |
{ |
{ |
| 760 |
if (clen == 0 || |
if (clen == 0 || |
| 761 |
(IS_NEWLINE(ptr) && |
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) && |
| 762 |
((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen) |
((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen) |
| 763 |
)) |
)) |
| 764 |
{ ADD_ACTIVE(state_offset + 1, 0); } |
{ ADD_ACTIVE(state_offset + 1, 0); } |
| 823 |
if (clen > 0) |
if (clen > 0) |
| 824 |
{ |
{ |
| 825 |
BOOL OK; |
BOOL OK; |
| 826 |
int category = _pcre_ucp_findprop(c, &chartype, &script); |
const ucd_record * prop = GET_UCD(c); |
| 827 |
switch(code[1]) |
switch(code[1]) |
| 828 |
{ |
{ |
| 829 |
case PT_ANY: |
case PT_ANY: |
| 831 |
break; |
break; |
| 832 |
|
|
| 833 |
case PT_LAMP: |
case PT_LAMP: |
| 834 |
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; |
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt; |
| 835 |
break; |
break; |
| 836 |
|
|
| 837 |
case PT_GC: |
case PT_GC: |
| 838 |
OK = category == code[2]; |
OK = _pcre_ucp_gentype[prop->chartype] == code[2]; |
| 839 |
break; |
break; |
| 840 |
|
|
| 841 |
case PT_PC: |
case PT_PC: |
| 842 |
OK = chartype == code[2]; |
OK = prop->chartype == code[2]; |
| 843 |
break; |
break; |
| 844 |
|
|
| 845 |
case PT_SC: |
case PT_SC: |
| 846 |
OK = script == code[2]; |
OK = prop->script == code[2]; |
| 847 |
break; |
break; |
| 848 |
|
|
| 849 |
/* Should never occur, but keep compilers from grumbling. */ |
/* Should never occur, but keep compilers from grumbling. */ |
| 992 |
if (clen > 0) |
if (clen > 0) |
| 993 |
{ |
{ |
| 994 |
BOOL OK; |
BOOL OK; |
| 995 |
int category = _pcre_ucp_findprop(c, &chartype, &script); |
const ucd_record * prop = GET_UCD(c); |
| 996 |
switch(code[2]) |
switch(code[2]) |
| 997 |
{ |
{ |
| 998 |
case PT_ANY: |
case PT_ANY: |
| 1000 |
break; |
break; |
| 1001 |
|
|
| 1002 |
case PT_LAMP: |
case PT_LAMP: |
| 1003 |
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; |
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt; |
| 1004 |
break; |
break; |
| 1005 |
|
|
| 1006 |
case PT_GC: |
case PT_GC: |
| 1007 |
OK = category == code[3]; |
OK = _pcre_ucp_gentype[prop->chartype] == code[3]; |
| 1008 |
break; |
break; |
| 1009 |
|
|
| 1010 |
case PT_PC: |
case PT_PC: |
| 1011 |
OK = chartype == code[3]; |
OK = prop->chartype == code[3]; |
| 1012 |
break; |
break; |
| 1013 |
|
|
| 1014 |
case PT_SC: |
case PT_SC: |
| 1015 |
OK = script == code[3]; |
OK = prop->script == code[3]; |
| 1016 |
break; |
break; |
| 1017 |
|
|
| 1018 |
/* Should never occur, but keep compilers from grumbling. */ |
/* Should never occur, but keep compilers from grumbling. */ |
| 1041 |
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS: |
case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS: |
| 1042 |
count = current_state->count; /* Already matched */ |
count = current_state->count; /* Already matched */ |
| 1043 |
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } |
if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); } |
| 1044 |
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M) |
if (clen > 0 && UCD_CATEGORY(c) != ucp_M) |
| 1045 |
{ |
{ |
| 1046 |
const uschar *nptr = ptr + clen; |
const uschar *nptr = ptr + clen; |
| 1047 |
int ncount = 0; |
int ncount = 0; |
| 1055 |
int nd; |
int nd; |
| 1056 |
int ndlen = 1; |
int ndlen = 1; |
| 1057 |
GETCHARLEN(nd, nptr, ndlen); |
GETCHARLEN(nd, nptr, ndlen); |
| 1058 |
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break; |
if (UCD_CATEGORY(nd) != ucp_M) break; |
| 1059 |
ncount++; |
ncount++; |
| 1060 |
nptr += ndlen; |
nptr += ndlen; |
| 1061 |
} |
} |
| 1214 |
if (clen > 0) |
if (clen > 0) |
| 1215 |
{ |
{ |
| 1216 |
BOOL OK; |
BOOL OK; |
| 1217 |
int category = _pcre_ucp_findprop(c, &chartype, &script); |
const ucd_record * prop = GET_UCD(c); |
| 1218 |
switch(code[2]) |
switch(code[2]) |
| 1219 |
{ |
{ |
| 1220 |
case PT_ANY: |
case PT_ANY: |
| 1222 |
break; |
break; |
| 1223 |
|
|
| 1224 |
case PT_LAMP: |
case PT_LAMP: |
| 1225 |
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; |
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt; |
| 1226 |
break; |
break; |
| 1227 |
|
|
| 1228 |
case PT_GC: |
case PT_GC: |
| 1229 |
OK = category == code[3]; |
OK = _pcre_ucp_gentype[prop->chartype] == code[3]; |
| 1230 |
break; |
break; |
| 1231 |
|
|
| 1232 |
case PT_PC: |
case PT_PC: |
| 1233 |
OK = chartype == code[3]; |
OK = prop->chartype == code[3]; |
| 1234 |
break; |
break; |
| 1235 |
|
|
| 1236 |
case PT_SC: |
case PT_SC: |
| 1237 |
OK = script == code[3]; |
OK = prop->script == code[3]; |
| 1238 |
break; |
break; |
| 1239 |
|
|
| 1240 |
/* Should never occur, but keep compilers from grumbling. */ |
/* Should never occur, but keep compilers from grumbling. */ |
| 1272 |
QS2: |
QS2: |
| 1273 |
|
|
| 1274 |
ADD_ACTIVE(state_offset + 2, 0); |
ADD_ACTIVE(state_offset + 2, 0); |
| 1275 |
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M) |
if (clen > 0 && UCD_CATEGORY(c) != ucp_M) |
| 1276 |
{ |
{ |
| 1277 |
const uschar *nptr = ptr + clen; |
const uschar *nptr = ptr + clen; |
| 1278 |
int ncount = 0; |
int ncount = 0; |
| 1287 |
int nd; |
int nd; |
| 1288 |
int ndlen = 1; |
int ndlen = 1; |
| 1289 |
GETCHARLEN(nd, nptr, ndlen); |
GETCHARLEN(nd, nptr, ndlen); |
| 1290 |
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break; |
if (UCD_CATEGORY(nd) != ucp_M) break; |
| 1291 |
ncount++; |
ncount++; |
| 1292 |
nptr += ndlen; |
nptr += ndlen; |
| 1293 |
} |
} |
| 1461 |
if (clen > 0) |
if (clen > 0) |
| 1462 |
{ |
{ |
| 1463 |
BOOL OK; |
BOOL OK; |
| 1464 |
int category = _pcre_ucp_findprop(c, &chartype, &script); |
const ucd_record * prop = GET_UCD(c); |
| 1465 |
switch(code[4]) |
switch(code[4]) |
| 1466 |
{ |
{ |
| 1467 |
case PT_ANY: |
case PT_ANY: |
| 1469 |
break; |
break; |
| 1470 |
|
|
| 1471 |
case PT_LAMP: |
case PT_LAMP: |
| 1472 |
OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt; |
OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt; |
| 1473 |
break; |
break; |
| 1474 |
|
|
| 1475 |
case PT_GC: |
case PT_GC: |
| 1476 |
OK = category == code[5]; |
OK = _pcre_ucp_gentype[prop->chartype] == code[5]; |
| 1477 |
break; |
break; |
| 1478 |
|
|
| 1479 |
case PT_PC: |
case PT_PC: |
| 1480 |
OK = chartype == code[5]; |
OK = prop->chartype == code[5]; |
| 1481 |
break; |
break; |
| 1482 |
|
|
| 1483 |
case PT_SC: |
case PT_SC: |
| 1484 |
OK = script == code[5]; |
OK = prop->script == code[5]; |
| 1485 |
break; |
break; |
| 1486 |
|
|
| 1487 |
/* Should never occur, but keep compilers from grumbling. */ |
/* Should never occur, but keep compilers from grumbling. */ |
| 1514 |
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) |
if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT) |
| 1515 |
{ ADD_ACTIVE(state_offset + 4, 0); } |
{ ADD_ACTIVE(state_offset + 4, 0); } |
| 1516 |
count = current_state->count; /* Number already matched */ |
count = current_state->count; /* Number already matched */ |
| 1517 |
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M) |
if (clen > 0 && UCD_CATEGORY(c) != ucp_M) |
| 1518 |
{ |
{ |
| 1519 |
const uschar *nptr = ptr + clen; |
const uschar *nptr = ptr + clen; |
| 1520 |
int ncount = 0; |
int ncount = 0; |
| 1528 |
int nd; |
int nd; |
| 1529 |
int ndlen = 1; |
int ndlen = 1; |
| 1530 |
GETCHARLEN(nd, nptr, ndlen); |
GETCHARLEN(nd, nptr, ndlen); |
| 1531 |
if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break; |
if (UCD_CATEGORY(nd) != ucp_M) break; |
| 1532 |
ncount++; |
ncount++; |
| 1533 |
nptr += ndlen; |
nptr += ndlen; |
| 1534 |
} |
} |
| 1708 |
other case of the character. */ |
other case of the character. */ |
| 1709 |
|
|
| 1710 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
| 1711 |
othercase = _pcre_ucp_othercase(c); |
othercase = UCD_OTHERCASE(c); |
| 1712 |
#else |
#else |
| 1713 |
othercase = NOTACHAR; |
othercase = NOTACHAR; |
| 1714 |
#endif |
#endif |
| 1733 |
to wait for them to pass before continuing. */ |
to wait for them to pass before continuing. */ |
| 1734 |
|
|
| 1735 |
case OP_EXTUNI: |
case OP_EXTUNI: |
| 1736 |
if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M) |
if (clen > 0 && UCD_CATEGORY(c) != ucp_M) |
| 1737 |
{ |
{ |
| 1738 |
const uschar *nptr = ptr + clen; |
const uschar *nptr = ptr + clen; |
| 1739 |
int ncount = 0; |
int ncount = 0; |
| 1741 |
{ |
{ |
| 1742 |
int nclen = 1; |
int nclen = 1; |
| 1743 |
GETCHARLEN(c, nptr, nclen); |
GETCHARLEN(c, nptr, nclen); |
| 1744 |
if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break; |
if (UCD_CATEGORY(c) != ucp_M) break; |
| 1745 |
ncount++; |
ncount++; |
| 1746 |
nptr += nclen; |
nptr += nclen; |
| 1747 |
} |
} |
| 1909 |
if (utf8 && d >= 128) |
if (utf8 && d >= 128) |
| 1910 |
{ |
{ |
| 1911 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
| 1912 |
otherd = _pcre_ucp_othercase(d); |
otherd = UCD_OTHERCASE(d); |
| 1913 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
| 1914 |
} |
} |
| 1915 |
else |
else |
| 1947 |
if (utf8 && d >= 128) |
if (utf8 && d >= 128) |
| 1948 |
{ |
{ |
| 1949 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
| 1950 |
otherd = _pcre_ucp_othercase(d); |
otherd = UCD_OTHERCASE(d); |
| 1951 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
| 1952 |
} |
} |
| 1953 |
else |
else |
| 1983 |
if (utf8 && d >= 128) |
if (utf8 && d >= 128) |
| 1984 |
{ |
{ |
| 1985 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
| 1986 |
otherd = _pcre_ucp_othercase(d); |
otherd = UCD_OTHERCASE(d); |
| 1987 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
| 1988 |
} |
} |
| 1989 |
else |
else |
| 2015 |
if (utf8 && d >= 128) |
if (utf8 && d >= 128) |
| 2016 |
{ |
{ |
| 2017 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
| 2018 |
otherd = _pcre_ucp_othercase(d); |
otherd = UCD_OTHERCASE(d); |
| 2019 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
| 2020 |
} |
} |
| 2021 |
else |
else |
| 2050 |
if (utf8 && d >= 128) |
if (utf8 && d >= 128) |
| 2051 |
{ |
{ |
| 2052 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
| 2053 |
otherd = _pcre_ucp_othercase(d); |
otherd = UCD_OTHERCASE(d); |
| 2054 |
#endif /* SUPPORT_UCP */ |
#endif /* SUPPORT_UCP */ |
| 2055 |
} |
} |
| 2056 |
else |
else |
| 2158 |
|
|
| 2159 |
/* ========================================================================== */ |
/* ========================================================================== */ |
| 2160 |
/* These are the opcodes for fancy brackets of various kinds. We have |
/* These are the opcodes for fancy brackets of various kinds. We have |
| 2161 |
to use recursion in order to handle them. The "always failing" assersion |
to use recursion in order to handle them. The "always failing" assersion |
| 2162 |
(?!) is optimised when compiling to OP_FAIL, so we have to support that, |
(?!) is optimised when compiling to OP_FAIL, so we have to support that, |
| 2163 |
though the other "backtracking verbs" are not supported. */ |
though the other "backtracking verbs" are not supported. */ |
| 2164 |
|
|
| 2165 |
case OP_FAIL: |
case OP_FAIL: |
| 2166 |
break; |
break; |
| 2167 |
|
|
| 2168 |
case OP_ASSERT: |
case OP_ASSERT: |
| 2169 |
case OP_ASSERT_NOT: |
case OP_ASSERT_NOT: |
| 2506 |
< -1 => some kind of unexpected problem |
< -1 => some kind of unexpected problem |
| 2507 |
*/ |
*/ |
| 2508 |
|
|
| 2509 |
PCRE_EXP_DEFN int |
PCRE_EXP_DEFN int PCRE_CALL_CONVENTION |
| 2510 |
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, |
pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data, |
| 2511 |
const char *subject, int length, int start_offset, int options, int *offsets, |
const char *subject, int length, int start_offset, int options, int *offsets, |
| 2512 |
int offsetcount, int *workspace, int wscount) |
int offsetcount, int *workspace, int wscount) |
| 2714 |
} |
} |
| 2715 |
|
|
| 2716 |
/* Call the main matching function, looping for a non-anchored regex after a |
/* Call the main matching function, looping for a non-anchored regex after a |
| 2717 |
failed match. Unless restarting, optimize by moving to the first match |
failed match. If not restarting, perform certain optimizations at the start of |
| 2718 |
character if possible, when not anchored. Then unless wanting a partial match, |
a match. */ |
|
check for a required later character. */ |
|
| 2719 |
|
|
| 2720 |
for (;;) |
for (;;) |
| 2721 |
{ |
{ |
| 2725 |
{ |
{ |
| 2726 |
const uschar *save_end_subject = end_subject; |
const uschar *save_end_subject = end_subject; |
| 2727 |
|
|
| 2728 |
/* Advance to a unique first char if possible. If firstline is TRUE, the |
/* If firstline is TRUE, the start of the match is constrained to the first |
| 2729 |
start of the match is constrained to the first line of a multiline string. |
line of a multiline string. Implement this by temporarily adjusting |
| 2730 |
Implement this by temporarily adjusting end_subject so that we stop |
end_subject so that we stop scanning at a newline. If the match fails at |
| 2731 |
scanning at a newline. If the match fails at the newline, later code breaks |
the newline, later code breaks this loop. */ |
|
this loop. */ |
|
| 2732 |
|
|
| 2733 |
if (firstline) |
if (firstline) |
| 2734 |
{ |
{ |
| 2735 |
const uschar *t = current_subject; |
USPTR t = current_subject; |
| 2736 |
|
#ifdef SUPPORT_UTF8 |
| 2737 |
|
if (utf8) |
| 2738 |
|
{ |
| 2739 |
|
while (t < md->end_subject && !IS_NEWLINE(t)) |
| 2740 |
|
{ |
| 2741 |
|
t++; |
| 2742 |
|
while (t < end_subject && (*t & 0xc0) == 0x80) t++; |
| 2743 |
|
} |
| 2744 |
|
} |
| 2745 |
|
else |
| 2746 |
|
#endif |
| 2747 |
while (t < md->end_subject && !IS_NEWLINE(t)) t++; |
while (t < md->end_subject && !IS_NEWLINE(t)) t++; |
| 2748 |
end_subject = t; |
end_subject = t; |
| 2749 |
} |
} |
| 2750 |
|
|
| 2751 |
if (first_byte >= 0) |
/* There are some optimizations that avoid running the match if a known |
| 2752 |
{ |
starting point is not found, or if a known later character is not present. |
| 2753 |
if (first_byte_caseless) |
However, there is an option that disables these, for testing and for |
| 2754 |
while (current_subject < end_subject && |
ensuring that all callouts do actually occur. */ |
| 2755 |
lcc[*current_subject] != first_byte) |
|
| 2756 |
current_subject++; |
if ((options & PCRE_NO_START_OPTIMIZE) == 0) |
| 2757 |
else |
{ |
| 2758 |
while (current_subject < end_subject && *current_subject != first_byte) |
|
| 2759 |
current_subject++; |
/* Advance to a known first byte. */ |
| 2760 |
} |
|
| 2761 |
|
if (first_byte >= 0) |
|
/* Or to just after a linebreak for a multiline match if possible */ |
|
|
|
|
|
else if (startline) |
|
|
{ |
|
|
if (current_subject > md->start_subject + start_offset) |
|
| 2762 |
{ |
{ |
| 2763 |
while (current_subject <= end_subject && !WAS_NEWLINE(current_subject)) |
if (first_byte_caseless) |
| 2764 |
current_subject++; |
while (current_subject < end_subject && |
| 2765 |
|
lcc[*current_subject] != first_byte) |
| 2766 |
/* If we have just passed a CR and the newline option is ANY or |
current_subject++; |
| 2767 |
ANYCRLF, and we are now at a LF, advance the match position by one more |
else |
| 2768 |
character. */ |
while (current_subject < end_subject && |
| 2769 |
|
*current_subject != first_byte) |
| 2770 |
if (current_subject[-1] == '\r' && |
current_subject++; |
|
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && |
|
|
current_subject < end_subject && |
|
|
*current_subject == '\n') |
|
|
current_subject++; |
|
| 2771 |
} |
} |
| 2772 |
} |
|
| 2773 |
|
/* Or to just after a linebreak for a multiline match if possible */ |
| 2774 |
/* Or to a non-unique first char after study */ |
|
| 2775 |
|
else if (startline) |
| 2776 |
else if (start_bits != NULL) |
{ |
| 2777 |
{ |
if (current_subject > md->start_subject + start_offset) |
| 2778 |
while (current_subject < end_subject) |
{ |
| 2779 |
|
#ifdef SUPPORT_UTF8 |
| 2780 |
|
if (utf8) |
| 2781 |
|
{ |
| 2782 |
|
while (current_subject < end_subject && |
| 2783 |
|
!WAS_NEWLINE(current_subject)) |
| 2784 |
|
{ |
| 2785 |
|
current_subject++; |
| 2786 |
|
while(current_subject < end_subject && |
| 2787 |
|
(*current_subject & 0xc0) == 0x80) |
| 2788 |
|
current_subject++; |
| 2789 |
|
} |
| 2790 |
|
} |
| 2791 |
|
else |
| 2792 |
|
#endif |
| 2793 |
|
while (current_subject < end_subject && !WAS_NEWLINE(current_subject)) |
| 2794 |
|
current_subject++; |
| 2795 |
|
|
| 2796 |
|
/* If we have just passed a CR and the newline option is ANY or |
| 2797 |
|
ANYCRLF, and we are now at a LF, advance the match position by one |
| 2798 |
|
more character. */ |
| 2799 |
|
|
| 2800 |
|
if (current_subject[-1] == '\r' && |
| 2801 |
|
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && |
| 2802 |
|
current_subject < end_subject && |
| 2803 |
|
*current_subject == '\n') |
| 2804 |
|
current_subject++; |
| 2805 |
|
} |
| 2806 |
|
} |
| 2807 |
|
|
| 2808 |
|
/* Or to a non-unique first char after study */ |
| 2809 |
|
|
| 2810 |
|
else if (start_bits != NULL) |
| 2811 |
{ |
{ |
| 2812 |
register unsigned int c = *current_subject; |
while (current_subject < end_subject) |
| 2813 |
if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++; |
{ |
| 2814 |
else break; |
register unsigned int c = *current_subject; |
| 2815 |
|
if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++; |
| 2816 |
|
else break; |
| 2817 |
|
} |
| 2818 |
} |
} |
| 2819 |
} |
} |
| 2820 |
|
|
| 2821 |
/* Restore fudged end_subject */ |
/* Restore fudged end_subject */ |
| 2822 |
|
|
| 2836 |
showed up when somebody was matching /^C/ on a 32-megabyte string... so we |
showed up when somebody was matching /^C/ on a 32-megabyte string... so we |
| 2837 |
don't do this when the string is sufficiently long. |
don't do this when the string is sufficiently long. |
| 2838 |
|
|
| 2839 |
ALSO: this processing is disabled when partial matching is requested. |
ALSO: this processing is disabled when partial matching is requested, and can |
| 2840 |
*/ |
also be explicitly deactivated. */ |
| 2841 |
|
|
| 2842 |
if (req_byte >= 0 && |
if ((options & PCRE_NO_START_OPTIMIZE) == 0 && |
| 2843 |
|
req_byte >= 0 && |
| 2844 |
end_subject - current_subject < REQ_BYTE_MAX && |
end_subject - current_subject < REQ_BYTE_MAX && |
| 2845 |
(options & PCRE_PARTIAL) == 0) |
(options & PCRE_PARTIAL) == 0) |
| 2846 |
{ |
{ |