| 1019 |
name it is given, it returns its number. Alternatively, if the name is NULL, it |
name it is given, it returns its number. Alternatively, if the name is NULL, it |
| 1020 |
returns when it reaches a given numbered subpattern. We know that if (?P< is |
returns when it reaches a given numbered subpattern. We know that if (?P< is |
| 1021 |
encountered, the name will be terminated by '>' because that is checked in the |
encountered, the name will be terminated by '>' because that is checked in the |
| 1022 |
first pass. Recursion is used to keep track of subpatterns that reset the |
first pass. Recursion is used to keep track of subpatterns that reset the |
| 1023 |
capturing group numbers - the (?| feature. |
capturing group numbers - the (?| feature. |
| 1024 |
|
|
| 1025 |
Arguments: |
Arguments: |
| 1028 |
name name to seek, or NULL if seeking a numbered subpattern |
name name to seek, or NULL if seeking a numbered subpattern |
| 1029 |
lorn name length, or subpattern number if name is NULL |
lorn name length, or subpattern number if name is NULL |
| 1030 |
xmode TRUE if we are in /x mode |
xmode TRUE if we are in /x mode |
| 1031 |
count pointer to the current capturing subpattern number (updated) |
count pointer to the current capturing subpattern number (updated) |
| 1032 |
|
|
| 1033 |
Returns: the number of the named subpattern, or -1 if not found |
Returns: the number of the named subpattern, or -1 if not found |
| 1034 |
*/ |
*/ |
| 1042 |
int hwm_count = start_count; |
int hwm_count = start_count; |
| 1043 |
BOOL dup_parens = FALSE; |
BOOL dup_parens = FALSE; |
| 1044 |
|
|
| 1045 |
/* If the first character is a parenthesis, check on the type of group we are |
/* If the first character is a parenthesis, check on the type of group we are |
| 1046 |
dealing with. The very first call may not start with a parenthesis. */ |
dealing with. The very first call may not start with a parenthesis. */ |
| 1047 |
|
|
| 1048 |
if (ptr[0] == CHAR_LEFT_PARENTHESIS) |
if (ptr[0] == CHAR_LEFT_PARENTHESIS) |
| 1049 |
{ |
{ |
| 1050 |
if (ptr[1] == CHAR_QUESTION_MARK && |
if (ptr[1] == CHAR_QUESTION_MARK && |
| 1051 |
ptr[2] == CHAR_VERTICAL_LINE) |
ptr[2] == CHAR_VERTICAL_LINE) |
| 1052 |
{ |
{ |
| 1053 |
ptr += 3; |
ptr += 3; |
| 1054 |
dup_parens = TRUE; |
dup_parens = TRUE; |
| 1055 |
} |
} |
| 1056 |
|
|
| 1057 |
/* Handle a normal, unnamed capturing parenthesis */ |
/* Handle a normal, unnamed capturing parenthesis */ |
| 1058 |
|
|
| 1059 |
else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK) |
else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK) |
| 1060 |
{ |
{ |
| 1061 |
*count += 1; |
*count += 1; |
| 1062 |
if (name == NULL && *count == lorn) return *count; |
if (name == NULL && *count == lorn) return *count; |
| 1063 |
ptr++; |
ptr++; |
| 1064 |
} |
} |
| 1065 |
|
|
| 1066 |
/* Handle a condition. If it is an assertion, just carry on so that it |
/* Handle a condition. If it is an assertion, just carry on so that it |
| 1067 |
is processed as normal. If not, skip to the closing parenthesis of the |
is processed as normal. If not, skip to the closing parenthesis of the |
| 1068 |
condition (there can't be any nested parens. */ |
condition (there can't be any nested parens. */ |
| 1069 |
|
|
| 1070 |
else if (ptr[2] == CHAR_LEFT_PARENTHESIS) |
else if (ptr[2] == CHAR_LEFT_PARENTHESIS) |
| 1071 |
{ |
{ |
| 1072 |
ptr += 2; |
ptr += 2; |
| 1073 |
if (ptr[1] != CHAR_QUESTION_MARK) |
if (ptr[1] != CHAR_QUESTION_MARK) |
| 1074 |
{ |
{ |
| 1075 |
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
| 1076 |
if (*ptr != 0) ptr++; |
if (*ptr != 0) ptr++; |
| 1077 |
} |
} |
| 1078 |
} |
} |
| 1079 |
|
|
| 1080 |
/* We have either (? or (* and not a condition */ |
/* We have either (? or (* and not a condition */ |
| 1081 |
|
|
| 1082 |
else |
else |
| 1083 |
{ |
{ |
| 1084 |
ptr += 2; |
ptr += 2; |
| 1085 |
if (*ptr == CHAR_P) ptr++; /* Allow optional P */ |
if (*ptr == CHAR_P) ptr++; /* Allow optional P */ |
| 1086 |
|
|
| 1087 |
/* We have to disambiguate (?<! and (?<= from (?<name> for named groups */ |
/* We have to disambiguate (?<! and (?<= from (?<name> for named groups */ |
| 1088 |
|
|
| 1089 |
if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK && |
if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK && |
| 1090 |
ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE) |
ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE) |
| 1091 |
{ |
{ |
| 1100 |
if (name != NULL && lorn == ptr - thisname && |
if (name != NULL && lorn == ptr - thisname && |
| 1101 |
strncmp((const char *)name, (const char *)thisname, lorn) == 0) |
strncmp((const char *)name, (const char *)thisname, lorn) == 0) |
| 1102 |
return *count; |
return *count; |
| 1103 |
} |
} |
| 1104 |
} |
} |
| 1105 |
} |
} |
| 1106 |
|
|
| 1107 |
/* Past any initial parenthesis handling, scan for parentheses or vertical |
/* Past any initial parenthesis handling, scan for parentheses or vertical |
| 1108 |
bars. */ |
bars. */ |
| 1109 |
|
|
| 1110 |
for (; *ptr != 0; ptr++) |
for (; *ptr != 0; ptr++) |
| 1185 |
} |
} |
| 1186 |
|
|
| 1187 |
/* Check for the special metacharacters */ |
/* Check for the special metacharacters */ |
| 1188 |
|
|
| 1189 |
if (*ptr == CHAR_LEFT_PARENTHESIS) |
if (*ptr == CHAR_LEFT_PARENTHESIS) |
| 1190 |
{ |
{ |
| 1191 |
int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count); |
int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count); |
| 1192 |
if (rc > 0) return rc; |
if (rc > 0) return rc; |
| 1193 |
if (*ptr == 0) goto FAIL_EXIT; |
if (*ptr == 0) goto FAIL_EXIT; |
| 1194 |
} |
} |
| 1195 |
|
|
| 1196 |
else if (*ptr == CHAR_RIGHT_PARENTHESIS) |
else if (*ptr == CHAR_RIGHT_PARENTHESIS) |
| 1197 |
{ |
{ |
| 1198 |
if (dup_parens && *count < hwm_count) *count = hwm_count; |
if (dup_parens && *count < hwm_count) *count = hwm_count; |
| 1199 |
*ptrptr = ptr; |
*ptrptr = ptr; |
| 1200 |
return -1; |
return -1; |
| 1201 |
} |
} |
| 1202 |
|
|
| 1203 |
else if (*ptr == CHAR_VERTICAL_LINE && dup_parens) |
else if (*ptr == CHAR_VERTICAL_LINE && dup_parens) |
| 1204 |
{ |
{ |
| 1205 |
if (*count > hwm_count) hwm_count = *count; |
if (*count > hwm_count) hwm_count = *count; |
| 1206 |
*count = start_count; |
*count = start_count; |
| 1207 |
} |
} |
| 1208 |
} |
} |
| 1209 |
|
|
| 1210 |
FAIL_EXIT: |
FAIL_EXIT: |
| 1251 |
does start with a parenthesis, find_parens_sub() will return when it hits the |
does start with a parenthesis, find_parens_sub() will return when it hits the |
| 1252 |
matching closing parens. That is why we have to have a loop. */ |
matching closing parens. That is why we have to have a loop. */ |
| 1253 |
|
|
| 1254 |
for (;;) |
for (;;) |
| 1255 |
{ |
{ |
| 1256 |
rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count); |
rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count); |
| 1257 |
if (rc > 0 || *ptr++ == 0) break; |
if (rc > 0 || *ptr++ == 0) break; |
| 1258 |
} |
} |
| 1259 |
|
|
| 1260 |
return rc; |
return rc; |
| 1261 |
} |
} |
| 1262 |
|
|
| 6226 |
|
|
| 6227 |
*erroroffset = 0; |
*erroroffset = 0; |
| 6228 |
|
|
|
/* Can't support UTF8 unless PCRE has been compiled to include the code. */ |
|
|
|
|
|
#ifdef SUPPORT_UTF8 |
|
|
utf8 = (options & PCRE_UTF8) != 0; |
|
|
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && |
|
|
(*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0) |
|
|
{ |
|
|
errorcode = ERR44; |
|
|
goto PCRE_EARLY_ERROR_RETURN2; |
|
|
} |
|
|
#else |
|
|
if ((options & PCRE_UTF8) != 0) |
|
|
{ |
|
|
errorcode = ERR32; |
|
|
goto PCRE_EARLY_ERROR_RETURN; |
|
|
} |
|
|
#endif |
|
|
|
|
|
if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0) |
|
|
{ |
|
|
errorcode = ERR17; |
|
|
goto PCRE_EARLY_ERROR_RETURN; |
|
|
} |
|
|
|
|
| 6229 |
/* Set up pointers to the individual character tables */ |
/* Set up pointers to the individual character tables */ |
| 6230 |
|
|
| 6231 |
if (tables == NULL) tables = _pcre_default_tables; |
if (tables == NULL) tables = _pcre_default_tables; |
| 6234 |
cd->cbits = tables + cbits_offset; |
cd->cbits = tables + cbits_offset; |
| 6235 |
cd->ctypes = tables + ctypes_offset; |
cd->ctypes = tables + ctypes_offset; |
| 6236 |
|
|
| 6237 |
|
/* Check that all undefined public option bits are zero */ |
| 6238 |
|
|
| 6239 |
|
if ((options & ~PUBLIC_COMPILE_OPTIONS) != 0) |
| 6240 |
|
{ |
| 6241 |
|
errorcode = ERR17; |
| 6242 |
|
goto PCRE_EARLY_ERROR_RETURN; |
| 6243 |
|
} |
| 6244 |
|
|
| 6245 |
/* Check for global one-time settings at the start of the pattern, and remember |
/* Check for global one-time settings at the start of the pattern, and remember |
| 6246 |
the offset for later. */ |
the offset for later. */ |
| 6247 |
|
|
| 6251 |
int newnl = 0; |
int newnl = 0; |
| 6252 |
int newbsr = 0; |
int newbsr = 0; |
| 6253 |
|
|
| 6254 |
|
if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0) |
| 6255 |
|
{ skipatstart += 7; options |= PCRE_UTF8; continue; } |
| 6256 |
|
|
| 6257 |
if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0) |
if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0) |
| 6258 |
{ skipatstart += 5; newnl = PCRE_NEWLINE_CR; } |
{ skipatstart += 5; newnl = PCRE_NEWLINE_CR; } |
| 6259 |
else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3) == 0) |
else if (strncmp((char *)(ptr+skipatstart+2), STRING_LF_RIGHTPAR, 3) == 0) |
| 6277 |
else break; |
else break; |
| 6278 |
} |
} |
| 6279 |
|
|
| 6280 |
|
/* Can't support UTF8 unless PCRE has been compiled to include the code. */ |
| 6281 |
|
|
| 6282 |
|
#ifdef SUPPORT_UTF8 |
| 6283 |
|
utf8 = (options & PCRE_UTF8) != 0; |
| 6284 |
|
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && |
| 6285 |
|
(*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0) |
| 6286 |
|
{ |
| 6287 |
|
errorcode = ERR44; |
| 6288 |
|
goto PCRE_EARLY_ERROR_RETURN2; |
| 6289 |
|
} |
| 6290 |
|
#else |
| 6291 |
|
if ((options & PCRE_UTF8) != 0) |
| 6292 |
|
{ |
| 6293 |
|
errorcode = ERR32; |
| 6294 |
|
goto PCRE_EARLY_ERROR_RETURN; |
| 6295 |
|
} |
| 6296 |
|
#endif |
| 6297 |
|
|
| 6298 |
/* Check validity of \R options. */ |
/* Check validity of \R options. */ |
| 6299 |
|
|
| 6300 |
switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) |
switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) |