| 408 |
"different names for subpatterns of the same number are not allowed\0" |
"different names for subpatterns of the same number are not allowed\0" |
| 409 |
"(*MARK) must have an argument\0" |
"(*MARK) must have an argument\0" |
| 410 |
"this version of PCRE is not compiled with PCRE_UCP support\0" |
"this version of PCRE is not compiled with PCRE_UCP support\0" |
| 411 |
|
"\\c must be followed by an ASCII character\0" |
| 412 |
; |
; |
| 413 |
|
|
| 414 |
/* Table to identify digits and hex digits. This is used when compiling |
/* Table to identify digits and hex digits. This is used when compiling |
| 842 |
break; |
break; |
| 843 |
|
|
| 844 |
/* For \c, a following letter is upper-cased; then the 0x40 bit is flipped. |
/* For \c, a following letter is upper-cased; then the 0x40 bit is flipped. |
| 845 |
This coding is ASCII-specific, but then the whole concept of \cx is |
An error is given if the byte following \c is not an ASCII character. This |
| 846 |
|
coding is ASCII-specific, but then the whole concept of \cx is |
| 847 |
ASCII-specific. (However, an EBCDIC equivalent has now been added.) */ |
ASCII-specific. (However, an EBCDIC equivalent has now been added.) */ |
| 848 |
|
|
| 849 |
case CHAR_c: |
case CHAR_c: |
| 853 |
*errorcodeptr = ERR2; |
*errorcodeptr = ERR2; |
| 854 |
break; |
break; |
| 855 |
} |
} |
| 856 |
|
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
| 857 |
#ifndef EBCDIC /* ASCII/UTF-8 coding */ |
if (c > 127) /* Excludes all non-ASCII in either mode */ |
| 858 |
|
{ |
| 859 |
|
*errorcodeptr = ERR68; |
| 860 |
|
break; |
| 861 |
|
} |
| 862 |
if (c >= CHAR_a && c <= CHAR_z) c -= 32; |
if (c >= CHAR_a && c <= CHAR_z) c -= 32; |
| 863 |
c ^= 0x40; |
c ^= 0x40; |
| 864 |
#else /* EBCDIC coding */ |
#else /* EBCDIC coding */ |
| 865 |
if (c >= CHAR_a && c <= CHAR_z) c += 64; |
if (c >= CHAR_a && c <= CHAR_z) c += 64; |
| 866 |
c ^= 0xC0; |
c ^= 0xC0; |
| 867 |
#endif |
#endif |
| 1105 |
start at a parenthesis. It scans along a pattern's text looking for capturing |
start at a parenthesis. It scans along a pattern's text looking for capturing |
| 1106 |
subpatterns, and counting them. If it finds a named pattern that matches the |
subpatterns, and counting them. If it finds a named pattern that matches the |
| 1107 |
name it is given, it returns its number. Alternatively, if the name is NULL, it |
name it is given, it returns its number. Alternatively, if the name is NULL, it |
| 1108 |
returns when it reaches a given numbered subpattern. We know that if (?P< is |
returns when it reaches a given numbered subpattern. Recursion is used to keep |
| 1109 |
encountered, the name will be terminated by '>' because that is checked in the |
track of subpatterns that reset the capturing group numbers - the (?| feature. |
| 1110 |
first pass. Recursion is used to keep track of subpatterns that reset the |
|
| 1111 |
capturing group numbers - the (?| feature. |
This function was originally called only from the second pass, in which we know |
| 1112 |
|
that if (?< or (?' or (?P< is encountered, the name will be correctly |
| 1113 |
|
terminated because that is checked in the first pass. There is now one call to |
| 1114 |
|
this function in the first pass, to check for a recursive back reference by |
| 1115 |
|
name (so that we can make the whole group atomic). In this case, we need check |
| 1116 |
|
only up to the current position in the pattern, and that is still OK because |
| 1117 |
|
and previous occurrences will have been checked. To make this work, the test |
| 1118 |
|
for "end of pattern" is a check against cd->end_pattern in the main loop, |
| 1119 |
|
instead of looking for a binary zero. This means that the special first-pass |
| 1120 |
|
call can adjust cd->end_pattern temporarily. (Checks for binary zero while |
| 1121 |
|
processing items within the loop are OK, because afterwards the main loop will |
| 1122 |
|
terminate.) |
| 1123 |
|
|
| 1124 |
Arguments: |
Arguments: |
| 1125 |
ptrptr address of the current character pointer (updated) |
ptrptr address of the current character pointer (updated) |
| 1127 |
name name to seek, or NULL if seeking a numbered subpattern |
name name to seek, or NULL if seeking a numbered subpattern |
| 1128 |
lorn name length, or subpattern number if name is NULL |
lorn name length, or subpattern number if name is NULL |
| 1129 |
xmode TRUE if we are in /x mode |
xmode TRUE if we are in /x mode |
| 1130 |
|
utf8 TRUE if we are in UTF-8 mode |
| 1131 |
count pointer to the current capturing subpattern number (updated) |
count pointer to the current capturing subpattern number (updated) |
| 1132 |
|
|
| 1133 |
Returns: the number of the named subpattern, or -1 if not found |
Returns: the number of the named subpattern, or -1 if not found |
| 1135 |
|
|
| 1136 |
static int |
static int |
| 1137 |
find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn, |
find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn, |
| 1138 |
BOOL xmode, int *count) |
BOOL xmode, BOOL utf8, int *count) |
| 1139 |
{ |
{ |
| 1140 |
uschar *ptr = *ptrptr; |
uschar *ptr = *ptrptr; |
| 1141 |
int start_count = *count; |
int start_count = *count; |
| 1220 |
} |
} |
| 1221 |
|
|
| 1222 |
/* Past any initial parenthesis handling, scan for parentheses or vertical |
/* Past any initial parenthesis handling, scan for parentheses or vertical |
| 1223 |
bars. */ |
bars. Stop if we get to cd->end_pattern. Note that this is important for the |
| 1224 |
|
first-pass call when this value is temporarily adjusted to stop at the current |
| 1225 |
|
position. So DO NOT change this to a test for binary zero. */ |
| 1226 |
|
|
| 1227 |
for (; *ptr != 0; ptr++) |
for (; ptr < cd->end_pattern; ptr++) |
| 1228 |
{ |
{ |
| 1229 |
/* Skip over backslashed characters and also entire \Q...\E */ |
/* Skip over backslashed characters and also entire \Q...\E */ |
| 1230 |
|
|
| 1298 |
|
|
| 1299 |
if (xmode && *ptr == CHAR_NUMBER_SIGN) |
if (xmode && *ptr == CHAR_NUMBER_SIGN) |
| 1300 |
{ |
{ |
| 1301 |
while (*(++ptr) != 0 && *ptr != CHAR_NL) {}; |
ptr++; |
| 1302 |
|
while (*ptr != 0) |
| 1303 |
|
{ |
| 1304 |
|
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
| 1305 |
|
ptr++; |
| 1306 |
|
#ifdef SUPPORT_UTF8 |
| 1307 |
|
if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++; |
| 1308 |
|
#endif |
| 1309 |
|
} |
| 1310 |
if (*ptr == 0) goto FAIL_EXIT; |
if (*ptr == 0) goto FAIL_EXIT; |
| 1311 |
continue; |
continue; |
| 1312 |
} |
} |
| 1315 |
|
|
| 1316 |
if (*ptr == CHAR_LEFT_PARENTHESIS) |
if (*ptr == CHAR_LEFT_PARENTHESIS) |
| 1317 |
{ |
{ |
| 1318 |
int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count); |
int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf8, count); |
| 1319 |
if (rc > 0) return rc; |
if (rc > 0) return rc; |
| 1320 |
if (*ptr == 0) goto FAIL_EXIT; |
if (*ptr == 0) goto FAIL_EXIT; |
| 1321 |
} |
} |
| 1361 |
name name to seek, or NULL if seeking a numbered subpattern |
name name to seek, or NULL if seeking a numbered subpattern |
| 1362 |
lorn name length, or subpattern number if name is NULL |
lorn name length, or subpattern number if name is NULL |
| 1363 |
xmode TRUE if we are in /x mode |
xmode TRUE if we are in /x mode |
| 1364 |
|
utf8 TRUE if we are in UTF-8 mode |
| 1365 |
|
|
| 1366 |
Returns: the number of the found subpattern, or -1 if not found |
Returns: the number of the found subpattern, or -1 if not found |
| 1367 |
*/ |
*/ |
| 1368 |
|
|
| 1369 |
static int |
static int |
| 1370 |
find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode) |
find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode, |
| 1371 |
|
BOOL utf8) |
| 1372 |
{ |
{ |
| 1373 |
uschar *ptr = (uschar *)cd->start_pattern; |
uschar *ptr = (uschar *)cd->start_pattern; |
| 1374 |
int count = 0; |
int count = 0; |
| 1381 |
|
|
| 1382 |
for (;;) |
for (;;) |
| 1383 |
{ |
{ |
| 1384 |
rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count); |
rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf8, &count); |
| 1385 |
if (rc > 0 || *ptr++ == 0) break; |
if (rc > 0 || *ptr++ == 0) break; |
| 1386 |
} |
} |
| 1387 |
|
|
| 2545 |
while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++; |
while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++; |
| 2546 |
if (*ptr == CHAR_NUMBER_SIGN) |
if (*ptr == CHAR_NUMBER_SIGN) |
| 2547 |
{ |
{ |
| 2548 |
while (*(++ptr) != 0) |
ptr++; |
| 2549 |
|
while (*ptr != 0) |
| 2550 |
|
{ |
| 2551 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
| 2552 |
|
ptr++; |
| 2553 |
|
#ifdef SUPPORT_UTF8 |
| 2554 |
|
if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++; |
| 2555 |
|
#endif |
| 2556 |
|
} |
| 2557 |
} |
} |
| 2558 |
else break; |
else break; |
| 2559 |
} |
} |
| 2589 |
while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++; |
while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++; |
| 2590 |
if (*ptr == CHAR_NUMBER_SIGN) |
if (*ptr == CHAR_NUMBER_SIGN) |
| 2591 |
{ |
{ |
| 2592 |
while (*(++ptr) != 0) |
ptr++; |
| 2593 |
|
while (*ptr != 0) |
| 2594 |
|
{ |
| 2595 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; } |
| 2596 |
|
ptr++; |
| 2597 |
|
#ifdef SUPPORT_UTF8 |
| 2598 |
|
if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++; |
| 2599 |
|
#endif |
| 2600 |
|
} |
| 2601 |
} |
} |
| 2602 |
else break; |
else break; |
| 2603 |
} |
} |
| 3170 |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
| 3171 |
if (c == CHAR_NUMBER_SIGN) |
if (c == CHAR_NUMBER_SIGN) |
| 3172 |
{ |
{ |
| 3173 |
while (*(++ptr) != 0) |
ptr++; |
| 3174 |
|
while (*ptr != 0) |
| 3175 |
{ |
{ |
| 3176 |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; } |
| 3177 |
|
ptr++; |
| 3178 |
|
#ifdef SUPPORT_UTF8 |
| 3179 |
|
if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++; |
| 3180 |
|
#endif |
| 3181 |
} |
} |
| 3182 |
if (*ptr != 0) continue; |
if (*ptr != 0) continue; |
| 3183 |
|
|
| 5085 |
/* Search the pattern for a forward reference */ |
/* Search the pattern for a forward reference */ |
| 5086 |
|
|
| 5087 |
else if ((i = find_parens(cd, name, namelen, |
else if ((i = find_parens(cd, name, namelen, |
| 5088 |
(options & PCRE_EXTENDED) != 0)) > 0) |
(options & PCRE_EXTENDED) != 0, utf8)) > 0) |
| 5089 |
{ |
{ |
| 5090 |
PUT2(code, 2+LINK_SIZE, i); |
PUT2(code, 2+LINK_SIZE, i); |
| 5091 |
code[1+LINK_SIZE]++; |
code[1+LINK_SIZE]++; |
| 5386 |
while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++; |
while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++; |
| 5387 |
namelen = (int)(ptr - name); |
namelen = (int)(ptr - name); |
| 5388 |
|
|
| 5389 |
/* In the pre-compile phase, do a syntax check and set a dummy |
/* In the pre-compile phase, do a syntax check. We used to just set |
| 5390 |
reference number. */ |
a dummy reference number, because it was not used in the first pass. |
| 5391 |
|
However, with the change of recursive back references to be atomic, |
| 5392 |
|
we have to look for the number so that this state can be identified, as |
| 5393 |
|
otherwise the incorrect length is computed. If it's not a backwards |
| 5394 |
|
reference, the dummy number will do. */ |
| 5395 |
|
|
| 5396 |
if (lengthptr != NULL) |
if (lengthptr != NULL) |
| 5397 |
{ |
{ |
| 5398 |
|
const uschar *temp; |
| 5399 |
|
|
| 5400 |
if (namelen == 0) |
if (namelen == 0) |
| 5401 |
{ |
{ |
| 5402 |
*errorcodeptr = ERR62; |
*errorcodeptr = ERR62; |
| 5412 |
*errorcodeptr = ERR48; |
*errorcodeptr = ERR48; |
| 5413 |
goto FAILED; |
goto FAILED; |
| 5414 |
} |
} |
| 5415 |
recno = 0; |
|
| 5416 |
|
/* The name table does not exist in the first pass, so we cannot |
| 5417 |
|
do a simple search as in the code below. Instead, we have to scan the |
| 5418 |
|
pattern to find the number. It is important that we scan it only as |
| 5419 |
|
far as we have got because the syntax of named subpatterns has not |
| 5420 |
|
been checked for the rest of the pattern, and find_parens() assumes |
| 5421 |
|
correct syntax. In any case, it's a waste of resources to scan |
| 5422 |
|
further. We stop the scan at the current point by temporarily |
| 5423 |
|
adjusting the value of cd->endpattern. */ |
| 5424 |
|
|
| 5425 |
|
temp = cd->end_pattern; |
| 5426 |
|
cd->end_pattern = ptr; |
| 5427 |
|
recno = find_parens(cd, name, namelen, |
| 5428 |
|
(options & PCRE_EXTENDED) != 0, utf8); |
| 5429 |
|
cd->end_pattern = temp; |
| 5430 |
|
if (recno < 0) recno = 0; /* Forward ref; set dummy number */ |
| 5431 |
} |
} |
| 5432 |
|
|
| 5433 |
/* In the real compile, seek the name in the table. We check the name |
/* In the real compile, seek the name in the table. We check the name |
| 5452 |
} |
} |
| 5453 |
else if ((recno = /* Forward back reference */ |
else if ((recno = /* Forward back reference */ |
| 5454 |
find_parens(cd, name, namelen, |
find_parens(cd, name, namelen, |
| 5455 |
(options & PCRE_EXTENDED) != 0)) <= 0) |
(options & PCRE_EXTENDED) != 0, utf8)) <= 0) |
| 5456 |
{ |
{ |
| 5457 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
| 5458 |
goto FAILED; |
goto FAILED; |
| 5563 |
if (called == NULL) |
if (called == NULL) |
| 5564 |
{ |
{ |
| 5565 |
if (find_parens(cd, NULL, recno, |
if (find_parens(cd, NULL, recno, |
| 5566 |
(options & PCRE_EXTENDED) != 0) < 0) |
(options & PCRE_EXTENDED) != 0, utf8) < 0) |
| 5567 |
{ |
{ |
| 5568 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
| 5569 |
goto FAILED; |
goto FAILED; |
| 6893 |
{ skipatstart += 7; options |= PCRE_UTF8; continue; } |
{ skipatstart += 7; options |= PCRE_UTF8; continue; } |
| 6894 |
else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0) |
else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0) |
| 6895 |
{ skipatstart += 6; options |= PCRE_UCP; continue; } |
{ skipatstart += 6; options |= PCRE_UCP; continue; } |
| 6896 |
|
else if (strncmp((char *)(ptr+skipatstart+2), STRING_NO_START_OPT_RIGHTPAR, 13) == 0) |
| 6897 |
|
{ skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; } |
| 6898 |
|
|
| 6899 |
if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0) |
if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0) |
| 6900 |
{ skipatstart += 5; newnl = PCRE_NEWLINE_CR; } |
{ skipatstart += 5; newnl = PCRE_NEWLINE_CR; } |