| 2714 |
} |
} |
| 2715 |
|
|
| 2716 |
/* Call the main matching function, looping for a non-anchored regex after a |
/* Call the main matching function, looping for a non-anchored regex after a |
| 2717 |
failed match. Unless restarting, optimize by moving to the first match |
failed match. If not restarting, perform certain optimizations at the start of |
| 2718 |
character if possible, when not anchored. Then unless wanting a partial match, |
a match. */ |
|
check for a required later character. */ |
|
| 2719 |
|
|
| 2720 |
for (;;) |
for (;;) |
| 2721 |
{ |
{ |
| 2725 |
{ |
{ |
| 2726 |
const uschar *save_end_subject = end_subject; |
const uschar *save_end_subject = end_subject; |
| 2727 |
|
|
| 2728 |
/* Advance to a unique first char if possible. If firstline is TRUE, the |
/* If firstline is TRUE, the start of the match is constrained to the first |
| 2729 |
start of the match is constrained to the first line of a multiline string. |
line of a multiline string. Implement this by temporarily adjusting |
| 2730 |
Implement this by temporarily adjusting end_subject so that we stop |
end_subject so that we stop scanning at a newline. If the match fails at |
| 2731 |
scanning at a newline. If the match fails at the newline, later code breaks |
the newline, later code breaks this loop. */ |
|
this loop. */ |
|
| 2732 |
|
|
| 2733 |
if (firstline) |
if (firstline) |
| 2734 |
{ |
{ |
| 2747 |
while (t < md->end_subject && !IS_NEWLINE(t)) t++; |
while (t < md->end_subject && !IS_NEWLINE(t)) t++; |
| 2748 |
end_subject = t; |
end_subject = t; |
| 2749 |
} |
} |
| 2750 |
|
|
| 2751 |
if (first_byte >= 0) |
/* There are some optimizations that avoid running the match if a known |
| 2752 |
{ |
starting point is not found, or if a known later character is not present. |
| 2753 |
if (first_byte_caseless) |
However, there is an option that disables these, for testing and for |
| 2754 |
while (current_subject < end_subject && |
ensuring that all callouts do actually occur. */ |
| 2755 |
lcc[*current_subject] != first_byte) |
|
| 2756 |
current_subject++; |
if ((options & PCRE_NO_START_OPTIMIZE) == 0) |
| 2757 |
else |
{ |
| 2758 |
while (current_subject < end_subject && *current_subject != first_byte) |
|
| 2759 |
current_subject++; |
/* Advance to a known first byte. */ |
| 2760 |
} |
|
| 2761 |
|
if (first_byte >= 0) |
|
/* Or to just after a linebreak for a multiline match if possible */ |
|
|
|
|
|
else if (startline) |
|
|
{ |
|
|
if (current_subject > md->start_subject + start_offset) |
|
| 2762 |
{ |
{ |
| 2763 |
#ifdef SUPPORT_UTF8 |
if (first_byte_caseless) |
| 2764 |
if (utf8) |
while (current_subject < end_subject && |
| 2765 |
|
lcc[*current_subject] != first_byte) |
| 2766 |
|
current_subject++; |
| 2767 |
|
else |
| 2768 |
|
while (current_subject < end_subject && |
| 2769 |
|
*current_subject != first_byte) |
| 2770 |
|
current_subject++; |
| 2771 |
|
} |
| 2772 |
|
|
| 2773 |
|
/* Or to just after a linebreak for a multiline match if possible */ |
| 2774 |
|
|
| 2775 |
|
else if (startline) |
| 2776 |
|
{ |
| 2777 |
|
if (current_subject > md->start_subject + start_offset) |
| 2778 |
{ |
{ |
| 2779 |
while (current_subject < end_subject && !WAS_NEWLINE(current_subject)) |
#ifdef SUPPORT_UTF8 |
| 2780 |
|
if (utf8) |
| 2781 |
{ |
{ |
| 2782 |
current_subject++; |
while (current_subject < end_subject && |
| 2783 |
while(current_subject < end_subject && |
!WAS_NEWLINE(current_subject)) |
| 2784 |
(*current_subject & 0xc0) == 0x80) |
{ |
| 2785 |
current_subject++; |
current_subject++; |
| 2786 |
|
while(current_subject < end_subject && |
| 2787 |
|
(*current_subject & 0xc0) == 0x80) |
| 2788 |
|
current_subject++; |
| 2789 |
|
} |
| 2790 |
} |
} |
| 2791 |
} |
else |
|
else |
|
| 2792 |
#endif |
#endif |
| 2793 |
while (current_subject < end_subject && !WAS_NEWLINE(current_subject)) |
while (current_subject < end_subject && !WAS_NEWLINE(current_subject)) |
| 2794 |
current_subject++; |
current_subject++; |
| 2795 |
|
|
| 2796 |
/* If we have just passed a CR and the newline option is ANY or |
/* If we have just passed a CR and the newline option is ANY or |
| 2797 |
ANYCRLF, and we are now at a LF, advance the match position by one more |
ANYCRLF, and we are now at a LF, advance the match position by one |
| 2798 |
character. */ |
more character. */ |
| 2799 |
|
|
| 2800 |
if (current_subject[-1] == '\r' && |
if (current_subject[-1] == '\r' && |
| 2801 |
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && |
(md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) && |
| 2802 |
current_subject < end_subject && |
current_subject < end_subject && |
| 2803 |
*current_subject == '\n') |
*current_subject == '\n') |
| 2804 |
current_subject++; |
current_subject++; |
| 2805 |
|
} |
| 2806 |
} |
} |
| 2807 |
} |
|
| 2808 |
|
/* Or to a non-unique first char after study */ |
| 2809 |
/* Or to a non-unique first char after study */ |
|
| 2810 |
|
else if (start_bits != NULL) |
|
else if (start_bits != NULL) |
|
|
{ |
|
|
while (current_subject < end_subject) |
|
| 2811 |
{ |
{ |
| 2812 |
register unsigned int c = *current_subject; |
while (current_subject < end_subject) |
| 2813 |
if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++; |
{ |
| 2814 |
else break; |
register unsigned int c = *current_subject; |
| 2815 |
|
if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++; |
| 2816 |
|
else break; |
| 2817 |
|
} |
| 2818 |
} |
} |
| 2819 |
} |
} |
| 2820 |
|
|
| 2821 |
/* Restore fudged end_subject */ |
/* Restore fudged end_subject */ |
| 2822 |
|
|
| 2836 |
showed up when somebody was matching /^C/ on a 32-megabyte string... so we |
showed up when somebody was matching /^C/ on a 32-megabyte string... so we |
| 2837 |
don't do this when the string is sufficiently long. |
don't do this when the string is sufficiently long. |
| 2838 |
|
|
| 2839 |
ALSO: this processing is disabled when partial matching is requested. |
ALSO: this processing is disabled when partial matching is requested, and can |
| 2840 |
*/ |
also be explicitly deactivated. */ |
| 2841 |
|
|
| 2842 |
if (req_byte >= 0 && |
if ((options & PCRE_NO_START_OPTIMIZE) == 0 && |
| 2843 |
|
req_byte >= 0 && |
| 2844 |
end_subject - current_subject < REQ_BYTE_MAX && |
end_subject - current_subject < REQ_BYTE_MAX && |
| 2845 |
(options & PCRE_PARTIAL) == 0) |
(options & PCRE_PARTIAL) == 0) |
| 2846 |
{ |
{ |