| 2975 |
} |
} |
| 2976 |
|
|
| 2977 |
oldptr = ptr; |
oldptr = ptr; |
| 2978 |
|
|
| 2979 |
/* Remember \r or \n */ |
/* Remember \r or \n */ |
| 2980 |
|
|
| 2981 |
if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF; |
if (c == '\r' || c == '\n') cd->external_flags |= PCRE_HASCRORLF; |
| 2982 |
|
|
| 2983 |
/* Check for range */ |
/* Check for range */ |
| 2984 |
|
|
| 2985 |
if (!inescq && ptr[1] == '-') |
if (!inescq && ptr[1] == '-') |
| 3050 |
if (d == c) goto LONE_SINGLE_CHARACTER; /* A few lines below */ |
if (d == c) goto LONE_SINGLE_CHARACTER; /* A few lines below */ |
| 3051 |
|
|
| 3052 |
/* Remember \r or \n */ |
/* Remember \r or \n */ |
| 3053 |
|
|
| 3054 |
if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF; |
if (d == '\r' || d == '\n') cd->external_flags |= PCRE_HASCRORLF; |
| 3055 |
|
|
| 3056 |
/* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless |
/* In UTF-8 mode, if the upper limit is > 255, or > 127 for caseless |
| 3057 |
matching, we have to use an XCLASS with extra data items. Caseless |
matching, we have to use an XCLASS with extra data items. Caseless |
| 3058 |
matching for characters > 127 is available only if UCP support is |
matching for characters > 127 is available only if UCP support is |
| 3157 |
apparent range that isn't. */ |
apparent range that isn't. */ |
| 3158 |
|
|
| 3159 |
LONE_SINGLE_CHARACTER: |
LONE_SINGLE_CHARACTER: |
| 3160 |
|
|
| 3161 |
/* Handle a character that cannot go in the bit map */ |
/* Handle a character that cannot go in the bit map */ |
| 3162 |
|
|
| 3163 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 3205 |
*errorcodeptr = ERR6; |
*errorcodeptr = ERR6; |
| 3206 |
goto FAILED; |
goto FAILED; |
| 3207 |
} |
} |
| 3208 |
|
|
| 3209 |
|
|
| 3210 |
/* This code has been disabled because it would mean that \s counts as |
/* This code has been disabled because it would mean that \s counts as |
| 3211 |
an explicit \r or \n reference, and that's not really what is wanted. Now |
an explicit \r or \n reference, and that's not really what is wanted. Now |
| 3212 |
we set the flag only if there is a literal "\r" or "\n" in the class. */ |
we set the flag only if there is a literal "\r" or "\n" in the class. */ |
| 3223 |
if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF; |
if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF; |
| 3224 |
} |
} |
| 3225 |
#endif |
#endif |
| 3226 |
|
|
| 3227 |
|
|
| 3228 |
/* If class_charcount is 1, we saw precisely one character whose value is |
/* If class_charcount is 1, we saw precisely one character whose value is |
| 3229 |
less than 256. As long as there were no characters >= 128 and there was no |
less than 256. As long as there were no characters >= 128 and there was no |
| 5763 |
cd->cbits = tables + cbits_offset; |
cd->cbits = tables + cbits_offset; |
| 5764 |
cd->ctypes = tables + ctypes_offset; |
cd->ctypes = tables + ctypes_offset; |
| 5765 |
|
|
| 5766 |
/* Check for newline settings at the start of the pattern, and remember the |
/* Check for global one-time settings at the start of the pattern, and remember |
| 5767 |
offset for later. */ |
the offset for later. */ |
| 5768 |
|
|
| 5769 |
if (ptr[0] == '(' && ptr[1] == '*') |
while (ptr[skipatstart] == '(' && ptr[skipatstart+1] == '*') |
| 5770 |
{ |
{ |
| 5771 |
int newnl = 0; |
int newnl = 0; |
| 5772 |
if (strncmp((char *)(ptr+2), "CR)", 3) == 0) |
int newbsr = 0; |
| 5773 |
{ skipatstart = 5; newnl = PCRE_NEWLINE_CR; } |
|
| 5774 |
else if (strncmp((char *)(ptr+2), "LF)", 3) == 0) |
if (strncmp((char *)(ptr+skipatstart+2), "CR)", 3) == 0) |
| 5775 |
{ skipatstart = 5; newnl = PCRE_NEWLINE_LF; } |
{ skipatstart += 5; newnl = PCRE_NEWLINE_CR; } |
| 5776 |
else if (strncmp((char *)(ptr+2), "CRLF)", 5) == 0) |
else if (strncmp((char *)(ptr+skipatstart+2), "LF)", 3) == 0) |
| 5777 |
{ skipatstart = 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; } |
{ skipatstart += 5; newnl = PCRE_NEWLINE_LF; } |
| 5778 |
else if (strncmp((char *)(ptr+2), "ANY)", 4) == 0) |
else if (strncmp((char *)(ptr+skipatstart+2), "CRLF)", 5) == 0) |
| 5779 |
{ skipatstart = 6; newnl = PCRE_NEWLINE_ANY; } |
{ skipatstart += 7; newnl = PCRE_NEWLINE_CR + PCRE_NEWLINE_LF; } |
| 5780 |
else if (strncmp((char *)(ptr+2), "ANYCRLF)", 8) == 0) |
else if (strncmp((char *)(ptr+skipatstart+2), "ANY)", 4) == 0) |
| 5781 |
{ skipatstart = 10; newnl = PCRE_NEWLINE_ANYCRLF; } |
{ skipatstart += 6; newnl = PCRE_NEWLINE_ANY; } |
| 5782 |
if (skipatstart > 0) |
else if (strncmp((char *)(ptr+skipatstart+2), "ANYCRLF)", 8) == 0) |
| 5783 |
|
{ skipatstart += 10; newnl = PCRE_NEWLINE_ANYCRLF; } |
| 5784 |
|
|
| 5785 |
|
else if (strncmp((char *)(ptr+skipatstart+2), "BSR_ANYCRLF)", 12) == 0) |
| 5786 |
|
{ skipatstart += 14; newbsr = PCRE_BSR_ANYCRLF; } |
| 5787 |
|
else if (strncmp((char *)(ptr+skipatstart+2), "BSR_UNICODE)", 12) == 0) |
| 5788 |
|
{ skipatstart += 14; newbsr = PCRE_BSR_UNICODE; } |
| 5789 |
|
|
| 5790 |
|
if (newnl != 0) |
| 5791 |
options = (options & ~PCRE_NEWLINE_BITS) | newnl; |
options = (options & ~PCRE_NEWLINE_BITS) | newnl; |
| 5792 |
|
else if (newbsr != 0) |
| 5793 |
|
options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr; |
| 5794 |
|
else break; |
| 5795 |
|
} |
| 5796 |
|
|
| 5797 |
|
/* Check validity of \R options. */ |
| 5798 |
|
|
| 5799 |
|
switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) |
| 5800 |
|
{ |
| 5801 |
|
case 0: |
| 5802 |
|
case PCRE_BSR_ANYCRLF: |
| 5803 |
|
case PCRE_BSR_UNICODE: |
| 5804 |
|
break; |
| 5805 |
|
default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN; |
| 5806 |
} |
} |
| 5807 |
|
|
| 5808 |
/* Handle different types of newline. The three bits give seven cases. The |
/* Handle different types of newline. The three bits give seven cases. The |
| 5914 |
goto PCRE_EARLY_ERROR_RETURN; |
goto PCRE_EARLY_ERROR_RETURN; |
| 5915 |
} |
} |
| 5916 |
|
|
| 5917 |
/* Put in the magic number, and save the sizes, initial options, internal |
/* Put in the magic number, and save the sizes, initial options, internal |
| 5918 |
flags, and character table pointer. NULL is used for the default character |
flags, and character table pointer. NULL is used for the default character |
| 5919 |
tables. The nullpad field is at the end; it's there to help in the case when a |
tables. The nullpad field is at the end; it's there to help in the case when a |
| 5920 |
regex compiled on a system with 4-byte pointers is run on another with 8-byte |
regex compiled on a system with 4-byte pointers is run on another with 8-byte |