| 302 |
"(*VERB) not recognized\0" |
"(*VERB) not recognized\0" |
| 303 |
"number is too big\0" |
"number is too big\0" |
| 304 |
"subpattern name expected\0" |
"subpattern name expected\0" |
| 305 |
"digit expected after (?+"; |
"digit expected after (?+\0" |
| 306 |
|
"] is an invalid data character in JavaScript compatibility mode"; |
| 307 |
|
|
| 308 |
|
|
| 309 |
/* Table to identify digits and hex digits. This is used when compiling |
/* Table to identify digits and hex digits. This is used when compiling |
| 976 |
|
|
| 977 |
Arguments: |
Arguments: |
| 978 |
ptr current position in the pattern |
ptr current position in the pattern |
| 979 |
count current count of capturing parens so far encountered |
cd compile background data |
| 980 |
name name to seek, or NULL if seeking a numbered subpattern |
name name to seek, or NULL if seeking a numbered subpattern |
| 981 |
lorn name length, or subpattern number if name is NULL |
lorn name length, or subpattern number if name is NULL |
| 982 |
xmode TRUE if we are in /x mode |
xmode TRUE if we are in /x mode |
| 985 |
*/ |
*/ |
| 986 |
|
|
| 987 |
static int |
static int |
| 988 |
find_parens(const uschar *ptr, int count, const uschar *name, int lorn, |
find_parens(const uschar *ptr, compile_data *cd, const uschar *name, int lorn, |
| 989 |
BOOL xmode) |
BOOL xmode) |
| 990 |
{ |
{ |
| 991 |
const uschar *thisname; |
const uschar *thisname; |
| 992 |
|
int count = cd->bracount; |
| 993 |
|
|
| 994 |
for (; *ptr != 0; ptr++) |
for (; *ptr != 0; ptr++) |
| 995 |
{ |
{ |
| 1009 |
continue; |
continue; |
| 1010 |
} |
} |
| 1011 |
|
|
| 1012 |
/* Skip over character classes */ |
/* Skip over character classes; this logic must be similar to the way they |
| 1013 |
|
are handled for real. If the first character is '^', skip it. Also, if the |
| 1014 |
|
first few characters (either before or after ^) are \Q\E or \E we skip them |
| 1015 |
|
too. This makes for compatibility with Perl. */ |
| 1016 |
|
|
| 1017 |
if (*ptr == '[') |
if (*ptr == '[') |
| 1018 |
{ |
{ |
| 1019 |
|
BOOL negate_class = FALSE; |
| 1020 |
|
for (;;) |
| 1021 |
|
{ |
| 1022 |
|
int c = *(++ptr); |
| 1023 |
|
if (c == '\\') |
| 1024 |
|
{ |
| 1025 |
|
if (ptr[1] == 'E') ptr++; |
| 1026 |
|
else if (strncmp((const char *)ptr+1, "Q\\E", 3) == 0) ptr += 3; |
| 1027 |
|
else break; |
| 1028 |
|
} |
| 1029 |
|
else if (!negate_class && c == '^') |
| 1030 |
|
negate_class = TRUE; |
| 1031 |
|
else break; |
| 1032 |
|
} |
| 1033 |
|
|
| 1034 |
|
/* If the next character is ']', it is a data character that must be |
| 1035 |
|
skipped, except in JavaScript compatibility mode. */ |
| 1036 |
|
|
| 1037 |
|
if (ptr[1] == ']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) == 0) |
| 1038 |
|
ptr++; |
| 1039 |
|
|
| 1040 |
while (*(++ptr) != ']') |
while (*(++ptr) != ']') |
| 1041 |
{ |
{ |
| 1042 |
if (*ptr == 0) return -1; |
if (*ptr == 0) return -1; |
| 1301 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 1302 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 1303 |
case OP_ANY: |
case OP_ANY: |
| 1304 |
|
case OP_ALLANY: |
| 1305 |
branchlength++; |
branchlength++; |
| 1306 |
cc++; |
cc++; |
| 1307 |
break; |
break; |
| 1594 |
|
|
| 1595 |
/* Groups with zero repeats can of course be empty; skip them. */ |
/* Groups with zero repeats can of course be empty; skip them. */ |
| 1596 |
|
|
| 1597 |
if (c == OP_BRAZERO || c == OP_BRAMINZERO) |
if (c == OP_BRAZERO || c == OP_BRAMINZERO || c == OP_SKIPZERO) |
| 1598 |
{ |
{ |
| 1599 |
code += _pcre_OP_lengths[c]; |
code += _pcre_OP_lengths[c]; |
| 1600 |
do code += GET(code, 1); while (*code == OP_ALT); |
do code += GET(code, 1); while (*code == OP_ALT); |
| 1680 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 1681 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 1682 |
case OP_ANY: |
case OP_ANY: |
| 1683 |
|
case OP_ALLANY: |
| 1684 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 1685 |
case OP_CHAR: |
case OP_CHAR: |
| 1686 |
case OP_CHARNC: |
case OP_CHARNC: |
| 1875 |
that is referenced. This means that groups can be replicated for fixed |
that is referenced. This means that groups can be replicated for fixed |
| 1876 |
repetition simply by copying (because the recursion is allowed to refer to |
repetition simply by copying (because the recursion is allowed to refer to |
| 1877 |
earlier groups that are outside the current group). However, when a group is |
earlier groups that are outside the current group). However, when a group is |
| 1878 |
optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before |
optional (i.e. the minimum quantifier is zero), OP_BRAZERO or OP_SKIPZERO is |
| 1879 |
it, after it has been compiled. This means that any OP_RECURSE items within it |
inserted before it, after it has been compiled. This means that any OP_RECURSE |
| 1880 |
that refer to the group itself or any contained groups have to have their |
items within it that refer to the group itself or any contained groups have to |
| 1881 |
offsets adjusted. That one of the jobs of this function. Before it is called, |
have their offsets adjusted. That one of the jobs of this function. Before it |
| 1882 |
the partially compiled regex must be temporarily terminated with OP_END. |
is called, the partially compiled regex must be temporarily terminated with |
| 1883 |
|
OP_END. |
| 1884 |
|
|
| 1885 |
This function has been extended with the possibility of forward references for |
This function has been extended with the possibility of forward references for |
| 1886 |
recursions and subroutine calls. It must also check the list of such references |
recursions and subroutine calls. It must also check the list of such references |
| 1911 |
|
|
| 1912 |
/* See if this recursion is on the forward reference list. If so, adjust the |
/* See if this recursion is on the forward reference list. If so, adjust the |
| 1913 |
reference. */ |
reference. */ |
| 1914 |
|
|
| 1915 |
for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE) |
for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE) |
| 1916 |
{ |
{ |
| 1917 |
offset = GET(hc, 0); |
offset = GET(hc, 0); |
| 2487 |
/* Get next byte in the pattern */ |
/* Get next byte in the pattern */ |
| 2488 |
|
|
| 2489 |
c = *ptr; |
c = *ptr; |
| 2490 |
|
|
| 2491 |
/* If we are in the pre-compile phase, accumulate the length used for the |
/* If we are in the pre-compile phase, accumulate the length used for the |
| 2492 |
previous cycle of this loop. */ |
previous cycle of this loop. */ |
| 2493 |
|
|
| 2667 |
zerofirstbyte = firstbyte; |
zerofirstbyte = firstbyte; |
| 2668 |
zeroreqbyte = reqbyte; |
zeroreqbyte = reqbyte; |
| 2669 |
previous = code; |
previous = code; |
| 2670 |
*code++ = OP_ANY; |
*code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY; |
| 2671 |
break; |
break; |
| 2672 |
|
|
| 2673 |
|
|
| 2682 |
opcode is compiled. It may optionally have a bit map for characters < 256, |
opcode is compiled. It may optionally have a bit map for characters < 256, |
| 2683 |
but those above are are explicitly listed afterwards. A flag byte tells |
but those above are are explicitly listed afterwards. A flag byte tells |
| 2684 |
whether the bitmap is present, and whether this is a negated class or not. |
whether the bitmap is present, and whether this is a negated class or not. |
| 2685 |
*/ |
|
| 2686 |
|
In JavaScript compatibility mode, an isolated ']' causes an error. In |
| 2687 |
|
default (Perl) mode, it is treated as a data character. */ |
| 2688 |
|
|
| 2689 |
|
case ']': |
| 2690 |
|
if ((cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) |
| 2691 |
|
{ |
| 2692 |
|
*errorcodeptr = ERR64; |
| 2693 |
|
goto FAILED; |
| 2694 |
|
} |
| 2695 |
|
goto NORMAL_CHAR; |
| 2696 |
|
|
| 2697 |
case '[': |
case '[': |
| 2698 |
previous = code; |
previous = code; |
| 2725 |
negate_class = TRUE; |
negate_class = TRUE; |
| 2726 |
else break; |
else break; |
| 2727 |
} |
} |
| 2728 |
|
|
| 2729 |
|
/* Empty classes are allowed in JavaScript compatibility mode. Otherwise, |
| 2730 |
|
an initial ']' is taken as a data character -- the code below handles |
| 2731 |
|
that. In JS mode, [] must always fail, so generate OP_FAIL, whereas |
| 2732 |
|
[^] must match any character, so generate OP_ALLANY. */ |
| 2733 |
|
|
| 2734 |
|
if (c ==']' && (cd->external_options & PCRE_JAVASCRIPT_COMPAT) != 0) |
| 2735 |
|
{ |
| 2736 |
|
*code++ = negate_class? OP_ALLANY : OP_FAIL; |
| 2737 |
|
if (firstbyte == REQ_UNSET) firstbyte = REQ_NONE; |
| 2738 |
|
zerofirstbyte = firstbyte; |
| 2739 |
|
break; |
| 2740 |
|
} |
| 2741 |
|
|
| 2742 |
/* If a class contains a negative special such as \S, we need to flip the |
/* If a class contains a negative special such as \S, we need to flip the |
| 2743 |
negation flag at the end, so that support for characters > 255 works |
negation flag at the end, so that support for characters > 255 works |
| 3894 |
|
|
| 3895 |
if (repeat_min == 0) |
if (repeat_min == 0) |
| 3896 |
{ |
{ |
| 3897 |
/* If the maximum is also zero, we just omit the group from the output |
/* If the maximum is also zero, we used to just omit the group from the |
| 3898 |
altogether. */ |
output altogether, like this: |
|
|
|
|
if (repeat_max == 0) |
|
|
{ |
|
|
code = previous; |
|
|
goto END_REPEAT; |
|
|
} |
|
| 3899 |
|
|
| 3900 |
/* If the maximum is 1 or unlimited, we just have to stick in the |
** if (repeat_max == 0) |
| 3901 |
BRAZERO and do no more at this point. However, we do need to adjust |
** { |
| 3902 |
any OP_RECURSE calls inside the group that refer to the group itself or |
** code = previous; |
| 3903 |
any internal or forward referenced group, because the offset is from |
** goto END_REPEAT; |
| 3904 |
the start of the whole regex. Temporarily terminate the pattern while |
** } |
| 3905 |
doing this. */ |
|
| 3906 |
|
However, that fails when a group is referenced as a subroutine from |
| 3907 |
|
elsewhere in the pattern, so now we stick in OP_SKIPZERO in front of it |
| 3908 |
|
so that it is skipped on execution. As we don't have a list of which |
| 3909 |
|
groups are referenced, we cannot do this selectively. |
| 3910 |
|
|
| 3911 |
|
If the maximum is 1 or unlimited, we just have to stick in the BRAZERO |
| 3912 |
|
and do no more at this point. However, we do need to adjust any |
| 3913 |
|
OP_RECURSE calls inside the group that refer to the group itself or any |
| 3914 |
|
internal or forward referenced group, because the offset is from the |
| 3915 |
|
start of the whole regex. Temporarily terminate the pattern while doing |
| 3916 |
|
this. */ |
| 3917 |
|
|
| 3918 |
if (repeat_max <= 1) |
if (repeat_max <= 1) /* Covers 0, 1, and unlimited */ |
| 3919 |
{ |
{ |
| 3920 |
*code = OP_END; |
*code = OP_END; |
| 3921 |
adjust_recurse(previous, 1, utf8, cd, save_hwm); |
adjust_recurse(previous, 1, utf8, cd, save_hwm); |
| 3922 |
memmove(previous+1, previous, len); |
memmove(previous+1, previous, len); |
| 3923 |
code++; |
code++; |
| 3924 |
|
if (repeat_max == 0) |
| 3925 |
|
{ |
| 3926 |
|
*previous++ = OP_SKIPZERO; |
| 3927 |
|
goto END_REPEAT; |
| 3928 |
|
} |
| 3929 |
*previous++ = OP_BRAZERO + repeat_type; |
*previous++ = OP_BRAZERO + repeat_type; |
| 3930 |
} |
} |
| 3931 |
|
|
| 4119 |
} |
} |
| 4120 |
} |
} |
| 4121 |
} |
} |
| 4122 |
|
|
| 4123 |
|
/* If previous is OP_FAIL, it was generated by an empty class [] in |
| 4124 |
|
JavaScript mode. The other ways in which OP_FAIL can be generated, that is |
| 4125 |
|
by (*FAIL) or (?!) set previous to NULL, which gives a "nothing to repeat" |
| 4126 |
|
error above. We can just ignore the repeat in JS case. */ |
| 4127 |
|
|
| 4128 |
|
else if (*previous == OP_FAIL) goto END_REPEAT; |
| 4129 |
|
|
| 4130 |
/* Else there's some kind of shambles */ |
/* Else there's some kind of shambles */ |
| 4131 |
|
|
| 4207 |
bravalue = OP_CBRA; |
bravalue = OP_CBRA; |
| 4208 |
save_hwm = cd->hwm; |
save_hwm = cd->hwm; |
| 4209 |
reset_bracount = FALSE; |
reset_bracount = FALSE; |
| 4210 |
|
|
| 4211 |
/* First deal with various "verbs" that can be introduced by '*'. */ |
/* First deal with various "verbs" that can be introduced by '*'. */ |
| 4212 |
|
|
| 4213 |
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0) |
if (*(++ptr) == '*' && (cd->ctypes[ptr[1]] & ctype_letter) != 0) |
| 4413 |
|
|
| 4414 |
/* Search the pattern for a forward reference */ |
/* Search the pattern for a forward reference */ |
| 4415 |
|
|
| 4416 |
else if ((i = find_parens(ptr, cd->bracount, name, namelen, |
else if ((i = find_parens(ptr, cd, name, namelen, |
| 4417 |
(options & PCRE_EXTENDED) != 0)) > 0) |
(options & PCRE_EXTENDED) != 0)) > 0) |
| 4418 |
{ |
{ |
| 4419 |
PUT2(code, 2+LINK_SIZE, i); |
PUT2(code, 2+LINK_SIZE, i); |
| 4710 |
recno = GET2(slot, 0); |
recno = GET2(slot, 0); |
| 4711 |
} |
} |
| 4712 |
else if ((recno = /* Forward back reference */ |
else if ((recno = /* Forward back reference */ |
| 4713 |
find_parens(ptr, cd->bracount, name, namelen, |
find_parens(ptr, cd, name, namelen, |
| 4714 |
(options & PCRE_EXTENDED) != 0)) <= 0) |
(options & PCRE_EXTENDED) != 0)) <= 0) |
| 4715 |
{ |
{ |
| 4716 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
| 4820 |
|
|
| 4821 |
if (called == NULL) |
if (called == NULL) |
| 4822 |
{ |
{ |
| 4823 |
if (find_parens(ptr, cd->bracount, NULL, recno, |
if (find_parens(ptr, cd, NULL, recno, |
| 4824 |
(options & PCRE_EXTENDED) != 0) < 0) |
(options & PCRE_EXTENDED) != 0) < 0) |
| 4825 |
{ |
{ |
| 4826 |
*errorcodeptr = ERR15; |
*errorcodeptr = ERR15; |
| 4827 |
goto FAILED; |
goto FAILED; |
| 5196 |
-ESC_g is returned only for these cases. So we don't need to check for < |
-ESC_g is returned only for these cases. So we don't need to check for < |
| 5197 |
or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is |
or ' if the value is -ESC_g. For the Perl syntax \g{n} the value is |
| 5198 |
-ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as |
-ESC_REF+n, and for the Perl syntax \g{name} the result is -ESC_k (as |
| 5199 |
that is a synonym). */ |
that is a synonym for a named back reference). */ |
| 5200 |
|
|
| 5201 |
if (-c == ESC_g) |
if (-c == ESC_g) |
| 5202 |
{ |
{ |
| 5203 |
const uschar *p; |
const uschar *p; |
| 5204 |
|
save_hwm = cd->hwm; /* Normally this is set when '(' is read */ |
| 5205 |
terminator = (*(++ptr) == '<')? '>' : '\''; |
terminator = (*(++ptr) == '<')? '>' : '\''; |
| 5206 |
|
|
| 5207 |
/* These two statements stop the compiler for warning about possibly |
/* These two statements stop the compiler for warning about possibly |
| 5755 |
if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE; |
if (!is_anchored(scode, options, bracket_map, backref_map)) return FALSE; |
| 5756 |
} |
} |
| 5757 |
|
|
| 5758 |
/* .* is not anchored unless DOTALL is set and it isn't in brackets that |
/* .* is not anchored unless DOTALL is set (which generates OP_ALLANY) and |
| 5759 |
are or may be referenced. */ |
it isn't in brackets that are or may be referenced. */ |
| 5760 |
|
|
| 5761 |
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || |
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR || |
| 5762 |
op == OP_TYPEPOSSTAR) && |
op == OP_TYPEPOSSTAR)) |
|
(*options & PCRE_DOTALL) != 0) |
|
| 5763 |
{ |
{ |
| 5764 |
if (scode[1] != OP_ANY || (bracket_map & backref_map) != 0) return FALSE; |
if (scode[1] != OP_ALLANY || (bracket_map & backref_map) != 0) |
| 5765 |
|
return FALSE; |
| 5766 |
} |
} |
| 5767 |
|
|
| 5768 |
/* Check for explicit anchoring */ |
/* Check for explicit anchoring */ |
| 6267 |
if (groupptr == NULL) errorcode = ERR53; |
if (groupptr == NULL) errorcode = ERR53; |
| 6268 |
else PUT(((uschar *)codestart), offset, groupptr - codestart); |
else PUT(((uschar *)codestart), offset, groupptr - codestart); |
| 6269 |
} |
} |
| 6270 |
|
|
| 6271 |
/* Give an error if there's back reference to a non-existent capturing |
/* Give an error if there's back reference to a non-existent capturing |
| 6272 |
subpattern. */ |
subpattern. */ |
| 6273 |
|
|