| 3 |
*************************************************/ |
*************************************************/ |
| 4 |
|
|
| 5 |
/* PCRE is a library of functions to support regular expressions whose syntax |
/* PCRE is a library of functions to support regular expressions whose syntax |
| 6 |
and semantics are as close as possible to those of the Perl 5 language (but see |
and semantics are as close as possible to those of the Perl 5 language (but see |
| 7 |
below for why this module is different). |
below for why this module is different). |
| 8 |
|
|
| 9 |
Written by Philip Hazel |
Written by Philip Hazel |
| 2747 |
while (t < md->end_subject && !IS_NEWLINE(t)) t++; |
while (t < md->end_subject && !IS_NEWLINE(t)) t++; |
| 2748 |
end_subject = t; |
end_subject = t; |
| 2749 |
} |
} |
| 2750 |
|
|
| 2751 |
/* There are some optimizations that avoid running the match if a known |
/* There are some optimizations that avoid running the match if a known |
| 2752 |
starting point is not found, or if a known later character is not present. |
starting point is not found, or if a known later character is not present. |
| 2753 |
However, there is an option that disables these, for testing and for |
However, there is an option that disables these, for testing and for |
| 2754 |
ensuring that all callouts do actually occur. */ |
ensuring that all callouts do actually occur. */ |
| 2755 |
|
|
| 2756 |
if ((options & PCRE_NO_START_OPTIMIZE) == 0) |
if ((options & PCRE_NO_START_OPTIMIZE) == 0) |
| 2757 |
{ |
{ |
| 2758 |
|
|
| 2759 |
/* Advance to a known first byte. */ |
/* Advance to a known first byte. */ |
| 2760 |
|
|
| 2761 |
if (first_byte >= 0) |
if (first_byte >= 0) |
| 2762 |
{ |
{ |
| 2763 |
if (first_byte_caseless) |
if (first_byte_caseless) |
| 2765 |
lcc[*current_subject] != first_byte) |
lcc[*current_subject] != first_byte) |
| 2766 |
current_subject++; |
current_subject++; |
| 2767 |
else |
else |
| 2768 |
while (current_subject < end_subject && |
while (current_subject < end_subject && |
| 2769 |
*current_subject != first_byte) |
*current_subject != first_byte) |
| 2770 |
current_subject++; |
current_subject++; |
| 2771 |
} |
} |
| 2772 |
|
|
| 2773 |
/* Or to just after a linebreak for a multiline match if possible */ |
/* Or to just after a linebreak for a multiline match if possible */ |
| 2774 |
|
|
| 2775 |
else if (startline) |
else if (startline) |
| 2776 |
{ |
{ |
| 2777 |
if (current_subject > md->start_subject + start_offset) |
if (current_subject > md->start_subject + start_offset) |
| 2779 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2780 |
if (utf8) |
if (utf8) |
| 2781 |
{ |
{ |
| 2782 |
while (current_subject < end_subject && |
while (current_subject < end_subject && |
| 2783 |
!WAS_NEWLINE(current_subject)) |
!WAS_NEWLINE(current_subject)) |
| 2784 |
{ |
{ |
| 2785 |
current_subject++; |
current_subject++; |
| 2792 |
#endif |
#endif |
| 2793 |
while (current_subject < end_subject && !WAS_NEWLINE(current_subject)) |
while (current_subject < end_subject && !WAS_NEWLINE(current_subject)) |
| 2794 |
current_subject++; |
current_subject++; |
| 2795 |
|
|
| 2796 |
/* If we have just passed a CR and the newline option is ANY or |
/* If we have just passed a CR and the newline option is ANY or |
| 2797 |
ANYCRLF, and we are now at a LF, advance the match position by one |
ANYCRLF, and we are now at a LF, advance the match position by one |
| 2798 |
more character. */ |
more character. */ |
| 2799 |
|
|
| 2800 |
if (current_subject[-1] == CHAR_CR && |
if (current_subject[-1] == CHAR_CR && |
| 2801 |
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && |
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && |
| 2802 |
current_subject < end_subject && |
current_subject < end_subject && |
| 2804 |
current_subject++; |
current_subject++; |
| 2805 |
} |
} |
| 2806 |
} |
} |
| 2807 |
|
|
| 2808 |
/* Or to a non-unique first char after study */ |
/* Or to a non-unique first char after study */ |
| 2809 |
|
|
| 2810 |
else if (start_bits != NULL) |
else if (start_bits != NULL) |
| 2811 |
{ |
{ |
| 2812 |
while (current_subject < end_subject) |
while (current_subject < end_subject) |
| 2816 |
else break; |
else break; |
| 2817 |
} |
} |
| 2818 |
} |
} |
| 2819 |
} |
} |
| 2820 |
|
|
| 2821 |
/* Restore fudged end_subject */ |
/* Restore fudged end_subject */ |
| 2822 |
|
|
| 2836 |
showed up when somebody was matching /^C/ on a 32-megabyte string... so we |
showed up when somebody was matching /^C/ on a 32-megabyte string... so we |
| 2837 |
don't do this when the string is sufficiently long. |
don't do this when the string is sufficiently long. |
| 2838 |
|
|
| 2839 |
ALSO: this processing is disabled when partial matching is requested, and can |
ALSO: this processing is disabled when partial matching is requested, and can |
| 2840 |
also be explicitly deactivated. */ |
also be explicitly deactivated. */ |
| 2841 |
|
|
| 2842 |
if ((options & PCRE_NO_START_OPTIMIZE) == 0 && |
if ((options & PCRE_NO_START_OPTIMIZE) == 0 && |