| 303 |
"number is too big\0" |
"number is too big\0" |
| 304 |
"subpattern name expected\0" |
"subpattern name expected\0" |
| 305 |
"digit expected after (?+\0" |
"digit expected after (?+\0" |
| 306 |
"] is an invalid data character in JavaScript compatibility mode"; |
"] is an invalid data character in JavaScript compatibility mode"; |
| 307 |
|
|
| 308 |
|
|
| 309 |
/* Table to identify digits and hex digits. This is used when compiling |
/* Table to identify digits and hex digits. This is used when compiling |
| 533 |
break; |
break; |
| 534 |
|
|
| 535 |
/* \g must be followed by one of a number of specific things: |
/* \g must be followed by one of a number of specific things: |
| 536 |
|
|
| 537 |
(1) A number, either plain or braced. If positive, it is an absolute |
(1) A number, either plain or braced. If positive, it is an absolute |
| 538 |
backreference. If negative, it is a relative backreference. This is a Perl |
backreference. If negative, it is a relative backreference. This is a Perl |
| 539 |
5.10 feature. |
5.10 feature. |
| 540 |
|
|
| 541 |
(2) Perl 5.10 also supports \g{name} as a reference to a named group. This |
(2) Perl 5.10 also supports \g{name} as a reference to a named group. This |
| 542 |
is part of Perl's movement towards a unified syntax for back references. As |
is part of Perl's movement towards a unified syntax for back references. As |
| 543 |
this is synonymous with \k{name}, we fudge it up by pretending it really |
this is synonymous with \k{name}, we fudge it up by pretending it really |
| 544 |
was \k. |
was \k. |
| 545 |
|
|
| 546 |
(3) For Oniguruma compatibility we also support \g followed by a name or a |
(3) For Oniguruma compatibility we also support \g followed by a name or a |
| 547 |
number either in angle brackets or in single quotes. However, these are |
number either in angle brackets or in single quotes. However, these are |
| 548 |
(possibly recursive) subroutine calls, _not_ backreferences. Just return |
(possibly recursive) subroutine calls, _not_ backreferences. Just return |
| 549 |
the -ESC_g code (cf \k). */ |
the -ESC_g code (cf \k). */ |
| 550 |
|
|
| 551 |
case 'g': |
case 'g': |
| 552 |
if (ptr[1] == '<' || ptr[1] == '\'') |
if (ptr[1] == '<' || ptr[1] == '\'') |
| 553 |
{ |
{ |
| 554 |
c = -ESC_g; |
c = -ESC_g; |
| 555 |
break; |
break; |
| 556 |
} |
} |
| 557 |
|
|
| 558 |
/* Handle the Perl-compatible cases */ |
/* Handle the Perl-compatible cases */ |
| 559 |
|
|
| 560 |
if (ptr[1] == '{') |
if (ptr[1] == '{') |
| 561 |
{ |
{ |
| 562 |
const uschar *p; |
const uschar *p; |
| 588 |
*errorcodeptr = ERR61; |
*errorcodeptr = ERR61; |
| 589 |
break; |
break; |
| 590 |
} |
} |
| 591 |
|
|
| 592 |
if (braced && *(++ptr) != '}') |
if (braced && *(++ptr) != '}') |
| 593 |
{ |
{ |
| 594 |
*errorcodeptr = ERR57; |
*errorcodeptr = ERR57; |
| 595 |
break; |
break; |
| 596 |
} |
} |
| 597 |
|
|
| 598 |
if (c == 0) |
if (c == 0) |
| 599 |
{ |
{ |
| 600 |
*errorcodeptr = ERR58; |
*errorcodeptr = ERR58; |
| 601 |
break; |
break; |
| 602 |
} |
} |
| 603 |
|
|
| 604 |
if (negated) |
if (negated) |
| 605 |
{ |
{ |
| 976 |
|
|
| 977 |
Arguments: |
Arguments: |
| 978 |
ptr current position in the pattern |
ptr current position in the pattern |
| 979 |
cd compile background data |
cd compile background data |
| 980 |
name name to seek, or NULL if seeking a numbered subpattern |
name name to seek, or NULL if seeking a numbered subpattern |
| 981 |
lorn name length, or subpattern number if name is NULL |
lorn name length, or subpattern number if name is NULL |
| 982 |
xmode TRUE if we are in /x mode |
xmode TRUE if we are in /x mode |
| 1033 |
|
|
| 1034 |
/* If the next character is ']', it is a data character that must be |
/* If the next character is ']', it is a data character that must be |
| 1035 |
skipped, except in JavaScript compatibility mode. */ |
skipped, except in JavaScript compatibility mode. */ |
| 1036 |
|
|
| 1037 |
if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0) |
if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0) |
| 1038 |
ptr++; |
ptr++; |
| 1039 |
|
|
| 1040 |
while (*(++ptr) != ']') |
while (*(++ptr) != ']') |
| 1041 |
{ |
{ |
| 1042 |
if (*ptr == 0) return -1; |
if (*ptr == 0) return -1; |
| 1680 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 1681 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 1682 |
case OP_ANY: |
case OP_ANY: |
| 1683 |
case OP_ALLANY: |
case OP_ALLANY: |
| 1684 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 1685 |
case OP_CHAR: |
case OP_CHAR: |
| 1686 |
case OP_CHARNC: |
case OP_CHARNC: |
| 1911 |
|
|
| 1912 |
/* See if this recursion is on the forward reference list. If so, adjust the |
/* See if this recursion is on the forward reference list. If so, adjust the |
| 1913 |
reference. */ |
reference. */ |
| 1914 |
|
|
| 1915 |
for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE) |
for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE) |
| 1916 |
{ |
{ |
| 1917 |
offset = GET(hc, 0); |
offset = GET(hc, 0); |
| 2487 |
/* Get next byte in the pattern */ |
/* Get next byte in the pattern */ |
| 2488 |
|
|
| 2489 |
c = *ptr; |
c = *ptr; |
| 2490 |
|
|
| 2491 |
/* If we are in the pre-compile phase, accumulate the length used for the |
/* If we are in the pre-compile phase, accumulate the length used for the |
| 2492 |
previous cycle of this loop. */ |
previous cycle of this loop. */ |
| 2493 |
|
|
| 2682 |
opcode is compiled. It may optionally have a bit map for characters < 256, |
opcode is compiled. It may optionally have a bit map for characters < 256, |
| 2683 |
but those above are are explicitly listed afterwards. A flag byte tells |
but those above are are explicitly listed afterwards. A flag byte tells |
| 2684 |
whether the bitmap is present, and whether this is a negated class or not. |
whether the bitmap is present, and whether this is a negated class or not. |
| 2685 |
|
|
| 2686 |
In JavaScript compatibility mode, an isolated ']' causes an error. In |
In JavaScript compatibility mode, an isolated ']' causes an error. In |
| 2687 |
default (Perl) mode, it is treated as a data character. */ |
default (Perl) mode, it is treated as a data character. */ |
| 2688 |
|
|
| 2689 |
case ']': |
case ']': |
| 2690 |
if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) |
if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) |
| 2691 |
{ |
{ |
| 2692 |
*errorcodeptr = ERR64; |
*errorcodeptr = ERR64; |
| 2693 |
goto FAILED; |
goto FAILED; |
| 2694 |
} |
} |
| 2695 |
goto NORMAL_CHAR; |
goto NORMAL_CHAR; |
| 2696 |
|
|
| 2697 |
case '[': |
case '[': |
| 2698 |
previous = code; |
previous = code; |
| 2725 |
negate_class = TRUE; |
negate_class = TRUE; |
| 2726 |
else break; |
else break; |
| 2727 |
} |
} |
| 2728 |
|
|
| 2729 |
/* Empty classes are allowed in JavaScript compatibility mode. Otherwise, |
/* Empty classes are allowed in JavaScript compatibility mode. Otherwise, |
| 2730 |
an initial ']' is taken as a data character -- the code below handles |
an initial ']' is taken as a data character -- the code below handles |
| 2731 |
that. In JS mode, [] must always fail, so generate OP_FAIL, whereas |
that. In JS mode, [] must always fail, so generate OP_FAIL, whereas |
| 2732 |
[^] must match any character, so generate OP_ALLANY. */ |
[^] must match any character, so generate OP_ALLANY. */ |
| 2733 |
|
|
| 2734 |
if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) |
if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) |
| 2735 |
{ |
{ |
| 2736 |
*code++ = negate_class? OP_ALLANY : OP_FAIL; |
*code++ = negate_class? OP_ALLANY : OP_FAIL; |
| 2737 |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
| 2738 |
zerofirstbyte = firstbyte; |
zerofirstbyte = firstbyte; |
| 2739 |
break; |
break; |
| 2740 |
} |
} |
| 2741 |
|
|
| 2742 |
/* If a class contains a negative special such as \S, we need to flip the |
/* If a class contains a negative special such as \S, we need to flip the |
| 2743 |
negation flag at the end, so that support for characters > 255 works |
negation flag at the end, so that support for characters > 255 works |
| 3902 |
** code = previous; |
** code = previous; |
| 3903 |
** goto END_REPEAT; |
** goto END_REPEAT; |
| 3904 |
** } |
** } |
| 3905 |
|
|
| 3906 |
However, that fails when a group is referenced as a subroutine from |
However, that fails when a group is referenced as a subroutine from |
| 3907 |
elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it |
elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it |
| 3908 |
so that it is skipped on execution. As we don't have a list of which |
so that it is skipped on execution. As we don't have a list of which |
| 3909 |
groups are referenced, we cannot do this selectively. |
groups are referenced, we cannot do this selectively. |
| 3910 |
|
|
| 3911 |
If the maximum is 1 or unlimited, we just have to stick in the BRAZERO |
If the maximum is 1 or unlimited, we just have to stick in the BRAZERO |
| 3912 |
and do no more at this point. However, we do need to adjust any |
and do no more at this point. However, we do need to adjust any |
| 3925 |
{ |
{ |
| 3926 |
*previous++ = OP_SKIPZERO; |
*previous++ = OP_SKIPZERO; |
| 3927 |
goto END_REPEAT; |
goto END_REPEAT; |
| 3928 |
} |
} |
| 3929 |
*previous++ = OP_BRAZERO + repeat_type; |
*previous++ = OP_BRAZERO + repeat_type; |
| 3930 |
} |
} |
| 3931 |
|
|
| 4119 |
} |
} |
| 4120 |
} |
} |
| 4121 |
} |
} |
| 4122 |
|
|
| 4123 |
/* If previous is OP_FAIL, it was generated by an empty class [] in |
/* If previous is OP_FAIL, it was generated by an empty class [] in |
| 4124 |
JavaScript mode. The other ways in which OP_FAIL can be generated, that is |
JavaScript mode. The other ways in which OP_FAIL can be generated, that is |
| 4125 |
by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat" |
by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat" |
| 4126 |
error above. We can just ignore the repeat in JS case. */ |
error above. We can just ignore the repeat in JS case. */ |
| 4127 |
|
|
| 4128 |
else if (*previous == OP_FAIL) goto END_REPEAT; |
else if (*previous == OP_FAIL) goto END_REPEAT; |
| 4129 |
|
|
| 4130 |
/* Else there's some kind of shambles */ |
/* Else there's some kind of shambles */ |
| 4131 |
|
|
| 4207 |
bravalue = OP_CBRA; |
bravalue = OP_CBRA; |
| 4208 |
save_hwm = cd->hwm; |
save_hwm = cd->hwm; |
| 4209 |
reset_bracount = FALSE; |
reset_bracount = FALSE; |
| 4210 |
|
|
| 4211 |
/* First deal with various "verbs" that can be introduced by '*'. */ |
/* First deal with various "verbs" that can be introduced by '*'. */ |
| 4212 |
|
|
| 4213 |
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0) |
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0) |
| 4738 |
{ |
{ |
| 4739 |
const uschar *called; |
const uschar *called; |
| 4740 |
terminator = ')'; |
terminator = ')'; |
| 4741 |
|
|
| 4742 |
/* Come here from the \g<...> and \g'...' code (Oniguruma |
/* Come here from the \g<...> and \g'...' code (Oniguruma |
| 4743 |
compatibility). However, the syntax has been checked to ensure that |
compatibility). However, the syntax has been checked to ensure that |
| 4744 |
the ... are a (signed) number, so that neither ERR63 nor ERR29 will |
the ... are a (signed) number, so that neither ERR63 nor ERR29 will |
| 4745 |
be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY |
be called on this path, nor with the jump to OTHER_CHAR_AFTER_QUERY |
| 4746 |
ever be taken. */ |
ever be taken. */ |
| 4747 |
|
|
| 4748 |
HANDLE_NUMERICAL_RECURSION: |
HANDLE_NUMERICAL_RECURSION: |
| 4749 |
|
|
| 4750 |
if ((refsign = *ptr) == '+') |
if ((refsign = *ptr) == '+') |
| 4751 |
{ |
{ |
| 5163 |
back references and those types that consume a character may be repeated. |
back references and those types that consume a character may be repeated. |
| 5164 |
We can test for values between ESC_b and ESC_Z for the latter; this may |
We can test for values between ESC_b and ESC_Z for the latter; this may |
| 5165 |
have to change if any new ones are ever created. */ |
have to change if any new ones are ever created. */ |
| 5166 |
|
|
| 5167 |
case '\\': |
case '\\': |
| 5168 |
tempptr = ptr; |
tempptr = ptr; |
| 5169 |
c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE); |
c = check_escape(&ptr, errorcodeptr, cd->bracount, options, FALSE); |
| 5190 |
|
|
| 5191 |
zerofirstbyte = firstbyte; |
zerofirstbyte = firstbyte; |
| 5192 |
zeroreqbyte = reqbyte; |
zeroreqbyte = reqbyte; |
| 5193 |
|
|
| 5194 |
/* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n' |
/* \g<name> or \g'name' is a subroutine call by name and \g<n> or \g'n' |
| 5195 |
is a subroutine call by number (Oniguruma syntax). In fact, the value |
is a subroutine call by number (Oniguruma syntax). In fact, the value |
| 5196 |
-ESC_g is returned only for these cases. So we don't need to check for < |
-ESC_g is returned only for these cases. So we don't need to check for < |
| 5197 |
or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is |
or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is |
| 5198 |
-ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as |
-ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as |
| 5199 |
that is a synonym for a named back reference). */ |
that is a synonym for a named back reference). */ |
| 5200 |
|
|
| 5201 |
if (-c == ESC_g) |
if (-c == ESC_g) |
| 5202 |
{ |
{ |
| 5203 |
const uschar *p; |
const uschar *p; |
| 5204 |
save_hwm = cd->hwm; /* Normally this is set when '(' is read */ |
save_hwm = cd->hwm; /* Normally this is set when '(' is read */ |
| 5205 |
terminator = (*(++ptr) == '<')? '>' : '\''; |
terminator = (*(++ptr) == '<')? '>' : '\''; |
| 5206 |
|
|
| 5207 |
/* These two statements stop the compiler for warning about possibly |
/* These two statements stop the compiler for warning about possibly |
| 5208 |
unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In |
unset variables caused by the jump to HANDLE_NUMERICAL_RECURSION. In |
| 5209 |
fact, because we actually check for a number below, the paths that |
fact, because we actually check for a number below, the paths that |
| 5210 |
would actually be in error are never taken. */ |
would actually be in error are never taken. */ |
| 5211 |
|
|
| 5212 |
skipbytes = 0; |
skipbytes = 0; |
| 5213 |
reset_bracount = FALSE; |
reset_bracount = FALSE; |
| 5214 |
|
|
| 5215 |
/* Test for a name */ |
/* Test for a name */ |
| 5216 |
|
|
| 5217 |
if (ptr[1] != '+' && ptr[1] != '-') |
if (ptr[1] != '+' && ptr[1] != '-') |
| 5218 |
{ |
{ |
| 5219 |
BOOL isnumber = TRUE; |
BOOL isnumber = TRUE; |
| 5220 |
for (p = ptr + 1; *p != 0 && *p != terminator; p++) |
for (p = ptr + 1; *p != 0 && *p != terminator; p++) |
| 5221 |
{ |
{ |
| 5222 |
if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE; |
if ((cd->ctypes[*p] & ctype_digit) == 0) isnumber = FALSE; |
| 5223 |
if ((cd->ctypes[*p] & ctype_word) == 0) break; |
if ((cd->ctypes[*p] & ctype_word) == 0) break; |
| 5224 |
} |
} |
| 5225 |
if (*p != terminator) |
if (*p != terminator) |
| 5226 |
{ |
{ |
| 5227 |
*errorcodeptr = ERR57; |
*errorcodeptr = ERR57; |
| 5228 |
break; |
break; |
| 5229 |
} |
} |
| 5230 |
if (isnumber) |
if (isnumber) |
| 5231 |
{ |
{ |
| 5232 |
ptr++; |
ptr++; |
| 5233 |
goto HANDLE_NUMERICAL_RECURSION; |
goto HANDLE_NUMERICAL_RECURSION; |
| 5234 |
} |
} |
| 5235 |
is_recurse = TRUE; |
is_recurse = TRUE; |
| 5236 |
goto NAMED_REF_OR_RECURSE; |
goto NAMED_REF_OR_RECURSE; |
| 5237 |
} |
} |
| 5238 |
|
|
| 5239 |
/* Test a signed number in angle brackets or quotes. */ |
/* Test a signed number in angle brackets or quotes. */ |
| 5240 |
|
|
| 5241 |
p = ptr + 2; |
p = ptr + 2; |
| 5242 |
while ((digitab[*p] & ctype_digit) != 0) p++; |
while ((digitab[*p] & ctype_digit) != 0) p++; |
| 5243 |
if (*p != terminator) |
if (*p != terminator) |
| 5245 |
*errorcodeptr = ERR57; |
*errorcodeptr = ERR57; |
| 5246 |
break; |
break; |
| 5247 |
} |
} |
| 5248 |
ptr++; |
ptr++; |
| 5249 |
goto HANDLE_NUMERICAL_RECURSION; |
goto HANDLE_NUMERICAL_RECURSION; |
| 5250 |
} |
} |
| 5251 |
|
|
| 5252 |
/* \k<name> or \k'name' is a back reference by name (Perl syntax). |
/* \k<name> or \k'name' is a back reference by name (Perl syntax). |
| 5253 |
We also support \k{name} (.NET syntax) */ |
We also support \k{name} (.NET syntax) */ |
| 5761 |
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || |
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || |
| 5762 |
op == OP_TYPEPOSSTAR)) |
op == OP_TYPEPOSSTAR)) |
| 5763 |
{ |
{ |
| 5764 |
if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0) |
if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0) |
| 5765 |
return FALSE; |
return FALSE; |
| 5766 |
} |
} |
| 5767 |
|
|
| 6267 |
if (groupptr == NULL) errorcode = ERR53; |
if (groupptr == NULL) errorcode = ERR53; |
| 6268 |
else PUT(((uschar *)codestart), offset, groupptr - codestart); |
else PUT(((uschar *)codestart), offset, groupptr - codestart); |
| 6269 |
} |
} |
| 6270 |
|
|
| 6271 |
/* Give an error if there's back reference to a non-existent capturing |
/* Give an error if there's back reference to a non-existent capturing |
| 6272 |
subpattern. */ |
subpattern. */ |
| 6273 |
|
|