| 1009 |
|
|
| 1010 |
|
|
| 1011 |
/************************************************* |
/************************************************* |
| 1012 |
* Find forward referenced subpattern * |
* Subroutine for finding forward reference * |
| 1013 |
*************************************************/ |
*************************************************/ |
| 1014 |
|
|
| 1015 |
/* This function scans along a pattern's text looking for capturing |
/* This recursive function is called only from find_parens() below. The |
| 1016 |
|
top-level call starts at the beginning of the pattern. All other calls must |
| 1017 |
|
start at a parenthesis. It scans along a pattern's text looking for capturing |
| 1018 |
subpatterns, and counting them. If it finds a named pattern that matches the |
subpatterns, and counting them. If it finds a named pattern that matches the |
| 1019 |
name it is given, it returns its number. Alternatively, if the name is NULL, it |
name it is given, it returns its number. Alternatively, if the name is NULL, it |
| 1020 |
returns when it reaches a given numbered subpattern. This is used for forward |
returns when it reaches a given numbered subpattern. We know that if (?P< is |
| 1021 |
references to subpatterns. We know that if (?P< is encountered, the name will |
encountered, the name will be terminated by '>' because that is checked in the |
| 1022 |
be terminated by '>' because that is checked in the first pass. |
first pass. Recursion is used to keep track of subpatterns that reset the |
| 1023 |
|
capturing group numbers - the (?| feature. |
| 1024 |
|
|
| 1025 |
Arguments: |
Arguments: |
| 1026 |
ptr current position in the pattern |
ptrptr address of the current character pointer (updated) |
| 1027 |
cd compile background data |
cd compile background data |
| 1028 |
name name to seek, or NULL if seeking a numbered subpattern |
name name to seek, or NULL if seeking a numbered subpattern |
| 1029 |
lorn name length, or subpattern number if name is NULL |
lorn name length, or subpattern number if name is NULL |
| 1030 |
xmode TRUE if we are in /x mode |
xmode TRUE if we are in /x mode |
| 1031 |
|
count pointer to the current capturing subpattern number (updated) |
| 1032 |
|
|
| 1033 |
Returns: the number of the named subpattern, or -1 if not found |
Returns: the number of the named subpattern, or -1 if not found |
| 1034 |
*/ |
*/ |
| 1035 |
|
|
| 1036 |
static int |
static int |
| 1037 |
find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn, |
find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn, |
| 1038 |
BOOL xmode) |
BOOL xmode, int *count) |
| 1039 |
{ |
{ |
| 1040 |
const uschar *thisname; |
uschar *ptr = *ptrptr; |
| 1041 |
int count = cd->bracount; |
int start_count = *count; |
| 1042 |
|
int hwm_count = start_count; |
| 1043 |
|
BOOL dup_parens = FALSE; |
| 1044 |
|
|
| 1045 |
for (; *ptr != 0; ptr++) |
/* If the first character is a parenthesis, check on the type of group we are |
| 1046 |
|
dealing with. The very first call may not start with a parenthesis. */ |
| 1047 |
|
|
| 1048 |
|
if (ptr[0] == CHAR_LEFT_PARENTHESIS) |
| 1049 |
{ |
{ |
| 1050 |
int term; |
if (ptr[1] == CHAR_QUESTION_MARK && |
| 1051 |
|
ptr[2] == CHAR_VERTICAL_LINE) |
| 1052 |
|
{ |
| 1053 |
|
ptr += 3; |
| 1054 |
|
dup_parens = TRUE; |
| 1055 |
|
} |
| 1056 |
|
|
| 1057 |
|
/* Handle a normal, unnamed capturing parenthesis */ |
| 1058 |
|
|
| 1059 |
|
else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK) |
| 1060 |
|
{ |
| 1061 |
|
*count += 1; |
| 1062 |
|
if (name == NULL && *count == lorn) return *count; |
| 1063 |
|
ptr++; |
| 1064 |
|
} |
| 1065 |
|
|
| 1066 |
|
/* Handle a condition. If it is an assertion, just carry on so that it |
| 1067 |
|
is processed as normal. If not, skip to the closing parenthesis of the |
| 1068 |
|
condition (there can't be any nested parens. */ |
| 1069 |
|
|
| 1070 |
|
else if (ptr[2] == CHAR_LEFT_PARENTHESIS) |
| 1071 |
|
{ |
| 1072 |
|
ptr += 2; |
| 1073 |
|
if (ptr[1] != CHAR_QUESTION_MARK) |
| 1074 |
|
{ |
| 1075 |
|
while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++; |
| 1076 |
|
if (*ptr != 0) ptr++; |
| 1077 |
|
} |
| 1078 |
|
} |
| 1079 |
|
|
| 1080 |
|
/* We have either (? or (* and not a condition */ |
| 1081 |
|
|
| 1082 |
|
else |
| 1083 |
|
{ |
| 1084 |
|
ptr += 2; |
| 1085 |
|
if (*ptr == CHAR_P) ptr++; /* Allow optional P */ |
| 1086 |
|
|
| 1087 |
|
/* We have to disambiguate (?<! and (?<= from (?<name> for named groups */ |
| 1088 |
|
|
| 1089 |
|
if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK && |
| 1090 |
|
ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE) |
| 1091 |
|
{ |
| 1092 |
|
int term; |
| 1093 |
|
const uschar *thisname; |
| 1094 |
|
*count += 1; |
| 1095 |
|
if (name == NULL && *count == lorn) return *count; |
| 1096 |
|
term = *ptr++; |
| 1097 |
|
if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN; |
| 1098 |
|
thisname = ptr; |
| 1099 |
|
while (*ptr != term) ptr++; |
| 1100 |
|
if (name != NULL && lorn == ptr - thisname && |
| 1101 |
|
strncmp((const char *)name, (const char *)thisname, lorn) == 0) |
| 1102 |
|
return *count; |
| 1103 |
|
} |
| 1104 |
|
} |
| 1105 |
|
} |
| 1106 |
|
|
| 1107 |
|
/* Past any initial parenthesis handling, scan for parentheses or vertical |
| 1108 |
|
bars. */ |
| 1109 |
|
|
| 1110 |
|
for (; *ptr != 0; ptr++) |
| 1111 |
|
{ |
| 1112 |
/* Skip over backslashed characters and also entire \Q...\E */ |
/* Skip over backslashed characters and also entire \Q...\E */ |
| 1113 |
|
|
| 1114 |
if (*ptr == CHAR_BACKSLASH) |
if (*ptr == CHAR_BACKSLASH) |
| 1115 |
{ |
{ |
| 1116 |
if (*(++ptr) == 0) return -1; |
if (*(++ptr) == 0) goto FAIL_EXIT; |
| 1117 |
if (*ptr == CHAR_Q) for (;;) |
if (*ptr == CHAR_Q) for (;;) |
| 1118 |
{ |
{ |
| 1119 |
while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {}; |
while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {}; |
| 1120 |
if (*ptr == 0) return -1; |
if (*ptr == 0) goto FAIL_EXIT; |
| 1121 |
if (*(++ptr) == CHAR_E) break; |
if (*(++ptr) == CHAR_E) break; |
| 1122 |
} |
} |
| 1123 |
continue; |
continue; |
| 1162 |
if (*ptr == 0) return -1; |
if (*ptr == 0) return -1; |
| 1163 |
if (*ptr == CHAR_BACKSLASH) |
if (*ptr == CHAR_BACKSLASH) |
| 1164 |
{ |
{ |
| 1165 |
if (*(++ptr) == 0) return -1; |
if (*(++ptr) == 0) goto FAIL_EXIT; |
| 1166 |
if (*ptr == CHAR_Q) for (;;) |
if (*ptr == CHAR_Q) for (;;) |
| 1167 |
{ |
{ |
| 1168 |
while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {}; |
while (*(++ptr) != 0 && *ptr != CHAR_BACKSLASH) {}; |
| 1169 |
if (*ptr == 0) return -1; |
if (*ptr == 0) goto FAIL_EXIT; |
| 1170 |
if (*(++ptr) == CHAR_E) break; |
if (*(++ptr) == CHAR_E) break; |
| 1171 |
} |
} |
| 1172 |
continue; |
continue; |
| 1180 |
if (xmode && *ptr == CHAR_NUMBER_SIGN) |
if (xmode && *ptr == CHAR_NUMBER_SIGN) |
| 1181 |
{ |
{ |
| 1182 |
while (*(++ptr) != 0 && *ptr != CHAR_NL) {}; |
while (*(++ptr) != 0 && *ptr != CHAR_NL) {}; |
| 1183 |
if (*ptr == 0) return -1; |
if (*ptr == 0) goto FAIL_EXIT; |
| 1184 |
continue; |
continue; |
| 1185 |
} |
} |
| 1186 |
|
|
| 1187 |
/* An opening parens must now be a real metacharacter */ |
/* Check for the special metacharacters */ |
| 1188 |
|
|
| 1189 |
if (*ptr != CHAR_LEFT_PARENTHESIS) continue; |
if (*ptr == CHAR_LEFT_PARENTHESIS) |
|
if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK) |
|
| 1190 |
{ |
{ |
| 1191 |
count++; |
int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count); |
| 1192 |
if (name == NULL && count == lorn) return count; |
if (rc > 0) return rc; |
| 1193 |
continue; |
if (*ptr == 0) goto FAIL_EXIT; |
| 1194 |
|
} |
| 1195 |
|
|
| 1196 |
|
else if (*ptr == CHAR_RIGHT_PARENTHESIS) |
| 1197 |
|
{ |
| 1198 |
|
if (dup_parens && *count < hwm_count) *count = hwm_count; |
| 1199 |
|
*ptrptr = ptr; |
| 1200 |
|
return -1; |
| 1201 |
} |
} |
| 1202 |
|
|
| 1203 |
|
else if (*ptr == CHAR_VERTICAL_LINE && dup_parens) |
| 1204 |
|
{ |
| 1205 |
|
if (*count > hwm_count) hwm_count = *count; |
| 1206 |
|
*count = start_count; |
| 1207 |
|
} |
| 1208 |
|
} |
| 1209 |
|
|
| 1210 |
ptr += 2; |
FAIL_EXIT: |
| 1211 |
if (*ptr == CHAR_P) ptr++; /* Allow optional P */ |
*ptrptr = ptr; |
| 1212 |
|
return -1; |
| 1213 |
|
} |
| 1214 |
|
|
|
/* We have to disambiguate (?<! and (?<= from (?<name> */ |
|
| 1215 |
|
|
|
if ((*ptr != CHAR_LESS_THAN_SIGN || ptr[1] == CHAR_EXCLAMATION_MARK || |
|
|
ptr[1] == CHAR_EQUALS_SIGN) && *ptr != CHAR_APOSTROPHE) |
|
|
continue; |
|
| 1216 |
|
|
|
count++; |
|
| 1217 |
|
|
| 1218 |
if (name == NULL && count == lorn) return count; |
/************************************************* |
| 1219 |
term = *ptr++; |
* Find forward referenced subpattern * |
| 1220 |
if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN; |
*************************************************/ |
|
thisname = ptr; |
|
|
while (*ptr != term) ptr++; |
|
|
if (name != NULL && lorn == ptr - thisname && |
|
|
strncmp((const char *)name, (const char *)thisname, lorn) == 0) |
|
|
return count; |
|
|
} |
|
| 1221 |
|
|
| 1222 |
return -1; |
/* This function scans along a pattern's text looking for capturing |
| 1223 |
|
subpatterns, and counting them. If it finds a named pattern that matches the |
| 1224 |
|
name it is given, it returns its number. Alternatively, if the name is NULL, it |
| 1225 |
|
returns when it reaches a given numbered subpattern. This is used for forward |
| 1226 |
|
references to subpatterns. We used to be able to start this scan from the |
| 1227 |
|
current compiling point, using the current count value from cd->bracount, and |
| 1228 |
|
do it all in a single loop, but the addition of the possibility of duplicate |
| 1229 |
|
subpattern numbers means that we have to scan from the very start, in order to |
| 1230 |
|
take account of such duplicates, and to use a recursive function to keep track |
| 1231 |
|
of the different types of group. |
| 1232 |
|
|
| 1233 |
|
Arguments: |
| 1234 |
|
cd compile background data |
| 1235 |
|
name name to seek, or NULL if seeking a numbered subpattern |
| 1236 |
|
lorn name length, or subpattern number if name is NULL |
| 1237 |
|
xmode TRUE if we are in /x mode |
| 1238 |
|
|
| 1239 |
|
Returns: the number of the found subpattern, or -1 if not found |
| 1240 |
|
*/ |
| 1241 |
|
|
| 1242 |
|
static int |
| 1243 |
|
find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode) |
| 1244 |
|
{ |
| 1245 |
|
uschar *ptr = (uschar *)cd->start_pattern; |
| 1246 |
|
int count = 0; |
| 1247 |
|
int rc; |
| 1248 |
|
|
| 1249 |
|
/* If the pattern does not start with an opening parenthesis, the first call |
| 1250 |
|
to find_parens_sub() will scan right to the end (if necessary). However, if it |
| 1251 |
|
does start with a parenthesis, find_parens_sub() will return when it hits the |
| 1252 |
|
matching closing parens. That is why we have to have a loop. */ |
| 1253 |
|
|
| 1254 |
|
for (;;) |
| 1255 |
|
{ |
| 1256 |
|
rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count); |
| 1257 |
|
if (rc > 0 || *ptr++ == 0) break; |
| 1258 |
|
} |
| 1259 |
|
|
| 1260 |
|
return rc; |
| 1261 |
} |
} |
| 1262 |
|
|
| 1263 |
|
|
| 1264 |
|
|
| 1265 |
|
|
| 1266 |
/************************************************* |
/************************************************* |
| 1267 |
* Find first significant op code * |
* Find first significant op code * |
| 1268 |
*************************************************/ |
*************************************************/ |
| 4601 |
|
|
| 4602 |
/* Search the pattern for a forward reference */ |
/* Search the pattern for a forward reference */ |
| 4603 |
|
|
| 4604 |
else if ((i = find_parens(ptr, cd, name, namelen, |
else if ((i = find_parens(cd, name, namelen, |
| 4605 |
(options & PCRE_EXTENDED) != 0)) > 0) |
(options & PCRE_EXTENDED) != 0)) > 0) |
| 4606 |
{ |
{ |
| 4607 |
PUT2(code, 2+LINK_SIZE, i); |
PUT2(code, 2+LINK_SIZE, i); |
| 4900 |
recno = GET2(slot, 0); |
recno = GET2(slot, 0); |
| 4901 |
} |
} |
| 4902 |
else if ((recno = /* Forward back reference */ |
else if ((recno = /* Forward back reference */ |
| 4903 |
find_parens(ptr, cd, name, namelen, |
find_parens(cd, name, namelen, |
| 4904 |
(options & PCRE_EXTENDED) != 0)) <= 0) |
(options & PCRE_EXTENDED) != 0)) <= 0) |
| 4905 |
{ |
{ |
| 4906 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
| 5010 |
|
|
| 5011 |
if (called == NULL) |
if (called == NULL) |
| 5012 |
{ |
{ |
| 5013 |
if (find_parens(ptr, cd, NULL, recno, |
if (find_parens(cd, NULL, recno, |
| 5014 |
(options & PCRE_EXTENDED) != 0) < 0) |
(options & PCRE_EXTENDED) != 0) < 0) |
| 5015 |
{ |
{ |
| 5016 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |