| 9 |
|
|
| 10 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
| 11 |
|
|
| 12 |
Copyright (c) 1997-2000 University of Cambridge |
Copyright (c) 1997-2001 University of Cambridge |
| 13 |
|
|
| 14 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 15 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
| 60 |
#endif |
#endif |
| 61 |
|
|
| 62 |
|
|
| 63 |
/* Number of items on the nested bracket stacks at compile time. This should |
/* Maximum number of items on the nested bracket stacks at compile time. This |
| 64 |
not be set greater than 200. */ |
applies to the nesting of all kinds of parentheses. It does not limit |
| 65 |
|
un-nested, non-capturing parentheses. This number can be made bigger if |
| 66 |
|
necessary - it is used to dimension one int and one unsigned char vector at |
| 67 |
|
compile time. */ |
| 68 |
|
|
| 69 |
#define BRASTACK_SIZE 200 |
#define BRASTACK_SIZE 200 |
| 70 |
|
|
| 98 |
"class", "Ref", "Recurse", |
"class", "Ref", "Recurse", |
| 99 |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", |
| 100 |
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", |
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", |
| 101 |
"Brazero", "Braminzero", "Bra" |
"Brazero", "Braminzero", "Branumber", "Bra" |
| 102 |
}; |
}; |
| 103 |
#endif |
#endif |
| 104 |
|
|
| 114 |
0, 0, 0, 0, 0, 0, 0, 0, /* H - O */ |
0, 0, 0, 0, 0, 0, 0, 0, /* H - O */ |
| 115 |
0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */ |
0, 0, 0, -ESC_S, 0, 0, 0, -ESC_W, /* P - W */ |
| 116 |
0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */ |
0, 0, -ESC_Z, '[', '\\', ']', '^', '_', /* X - _ */ |
| 117 |
'`', 7, -ESC_b, 0, -ESC_d, 27, '\f', 0, /* ` - g */ |
'`', 7, -ESC_b, 0, -ESC_d, ESC_E, ESC_F, 0, /* ` - g */ |
| 118 |
0, 0, 0, 0, 0, 0, '\n', 0, /* h - o */ |
0, 0, 0, 0, 0, 0, ESC_N, 0, /* h - o */ |
| 119 |
0, 0, '\r', -ESC_s, '\t', 0, 0, -ESC_w, /* p - w */ |
0, 0, ESC_R, -ESC_s, ESC_T, 0, 0, -ESC_w, /* p - w */ |
| 120 |
0, 0, -ESC_z /* x - z */ |
0, 0, -ESC_z /* x - z */ |
| 121 |
}; |
}; |
| 122 |
|
|
| 817 |
/* Skip over things that don't match chars */ |
/* Skip over things that don't match chars */ |
| 818 |
|
|
| 819 |
case OP_REVERSE: |
case OP_REVERSE: |
| 820 |
|
case OP_BRANUMBER: |
| 821 |
|
case OP_CREF: |
| 822 |
cc++; |
cc++; |
| 823 |
/* Fall through */ |
/* Fall through */ |
| 824 |
|
|
|
case OP_CREF: |
|
| 825 |
case OP_OPT: |
case OP_OPT: |
| 826 |
cc++; |
cc++; |
| 827 |
/* Fall through */ |
/* Fall through */ |
| 875 |
/* Check a class for variable quantification */ |
/* Check a class for variable quantification */ |
| 876 |
|
|
| 877 |
case OP_CLASS: |
case OP_CLASS: |
| 878 |
cc += (*cc == OP_REF)? 2 : 33; |
cc += 33; |
| 879 |
|
|
| 880 |
switch (*cc) |
switch (*cc) |
| 881 |
{ |
{ |
| 982 |
|
|
| 983 |
Arguments: |
Arguments: |
| 984 |
options the option bits |
options the option bits |
| 985 |
brackets points to number of brackets used |
brackets points to number of extracting brackets used |
| 986 |
code points to the pointer to the current code point |
code points to the pointer to the current code point |
| 987 |
ptrptr points to the current pattern pointer |
ptrptr points to the current pattern pointer |
| 988 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
| 1033 |
int class_charcount; |
int class_charcount; |
| 1034 |
int class_lastchar; |
int class_lastchar; |
| 1035 |
int newoptions; |
int newoptions; |
| 1036 |
int condref; |
int skipbytes; |
| 1037 |
int subreqchar; |
int subreqchar; |
| 1038 |
|
|
| 1039 |
c = *ptr; |
c = *ptr; |
| 1044 |
{ |
{ |
| 1045 |
/* The space before the ; is to avoid a warning on a silly compiler |
/* The space before the ; is to avoid a warning on a silly compiler |
| 1046 |
on the Macintosh. */ |
on the Macintosh. */ |
| 1047 |
while ((c = *(++ptr)) != 0 && c != '\n') ; |
while ((c = *(++ptr)) != 0 && c != NEWLINE) ; |
| 1048 |
continue; |
continue; |
| 1049 |
} |
} |
| 1050 |
} |
} |
| 1582 |
OP_BRAZERO in front of it, and because the group appears once in the |
OP_BRAZERO in front of it, and because the group appears once in the |
| 1583 |
data, whereas in other cases it appears the minimum number of times. For |
data, whereas in other cases it appears the minimum number of times. For |
| 1584 |
this reason, it is simplest to treat this case separately, as otherwise |
this reason, it is simplest to treat this case separately, as otherwise |
| 1585 |
the code gets far too mess. There are several special subcases when the |
the code gets far too messy. There are several special subcases when the |
| 1586 |
minimum is zero. */ |
minimum is zero. */ |
| 1587 |
|
|
| 1588 |
if (repeat_min == 0) |
if (repeat_min == 0) |
| 1733 |
|
|
| 1734 |
case '(': |
case '(': |
| 1735 |
newoptions = options; |
newoptions = options; |
| 1736 |
condref = -1; |
skipbytes = 0; |
| 1737 |
|
|
| 1738 |
if (*(++ptr) == '?') |
if (*(++ptr) == '?') |
| 1739 |
{ |
{ |
| 1756 |
bravalue = OP_COND; /* Conditional group */ |
bravalue = OP_COND; /* Conditional group */ |
| 1757 |
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) |
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) |
| 1758 |
{ |
{ |
| 1759 |
condref = *ptr - '0'; |
int condref = *ptr - '0'; |
| 1760 |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
| 1761 |
if (condref == 0) |
if (condref == 0) |
| 1762 |
{ |
{ |
| 1764 |
goto FAILED; |
goto FAILED; |
| 1765 |
} |
} |
| 1766 |
ptr++; |
ptr++; |
| 1767 |
|
code[3] = OP_CREF; |
| 1768 |
|
code[4] = condref >> 8; |
| 1769 |
|
code[5] = condref & 255; |
| 1770 |
|
skipbytes = 3; |
| 1771 |
} |
} |
| 1772 |
else ptr--; |
else ptr--; |
| 1773 |
break; |
break; |
| 1870 |
} |
} |
| 1871 |
} |
} |
| 1872 |
|
|
| 1873 |
/* Else we have a referencing group; adjust the opcode. */ |
/* Else we have a referencing group; adjust the opcode. If the bracket |
| 1874 |
|
number is greater than EXTRACT_BASIC_MAX, we set the opcode one higher, and |
| 1875 |
|
arrange for the true number to follow later, in an OP_BRANUMBER item. */ |
| 1876 |
|
|
| 1877 |
else |
else |
| 1878 |
{ |
{ |
| 1879 |
if (++(*brackets) > EXTRACT_MAX) |
if (++(*brackets) > EXTRACT_BASIC_MAX) |
| 1880 |
{ |
{ |
| 1881 |
*errorptr = ERR13; |
bravalue = OP_BRA + EXTRACT_BASIC_MAX + 1; |
| 1882 |
goto FAILED; |
code[3] = OP_BRANUMBER; |
| 1883 |
|
code[4] = *brackets >> 8; |
| 1884 |
|
code[5] = *brackets & 255; |
| 1885 |
|
skipbytes = 3; |
| 1886 |
} |
} |
| 1887 |
bravalue = OP_BRA + *brackets; |
else bravalue = OP_BRA + *brackets; |
| 1888 |
} |
} |
| 1889 |
|
|
| 1890 |
/* Process nested bracketed re. Assertions may not be repeated, but other |
/* Process nested bracketed re. Assertions may not be repeated, but other |
| 1900 |
options | PCRE_INGROUP, /* Set for all nested groups */ |
options | PCRE_INGROUP, /* Set for all nested groups */ |
| 1901 |
((options & PCRE_IMS) != (newoptions & PCRE_IMS))? |
((options & PCRE_IMS) != (newoptions & PCRE_IMS))? |
| 1902 |
newoptions & PCRE_IMS : -1, /* Pass ims options if changed */ |
newoptions & PCRE_IMS : -1, /* Pass ims options if changed */ |
| 1903 |
brackets, /* Bracket level */ |
brackets, /* Extracting bracket count */ |
| 1904 |
&tempcode, /* Where to put code (updated) */ |
&tempcode, /* Where to put code (updated) */ |
| 1905 |
&ptr, /* Input pointer (updated) */ |
&ptr, /* Input pointer (updated) */ |
| 1906 |
errorptr, /* Where to put an error message */ |
errorptr, /* Where to put an error message */ |
| 1907 |
(bravalue == OP_ASSERTBACK || |
(bravalue == OP_ASSERTBACK || |
| 1908 |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
| 1909 |
condref, /* Condition reference number */ |
skipbytes, /* Skip over OP_COND/OP_BRANUMBER */ |
| 1910 |
&subreqchar, /* For possible last char */ |
&subreqchar, /* For possible last char */ |
| 1911 |
&subcountlits, /* For literal count */ |
&subcountlits, /* For literal count */ |
| 1912 |
cd)) /* Tables block */ |
cd)) /* Tables block */ |
| 1920 |
/* If this is a conditional bracket, check that there are no more than |
/* If this is a conditional bracket, check that there are no more than |
| 1921 |
two branches in the group. */ |
two branches in the group. */ |
| 1922 |
|
|
| 1923 |
if (bravalue == OP_COND) |
else if (bravalue == OP_COND) |
| 1924 |
{ |
{ |
| 1925 |
uschar *tc = code; |
uschar *tc = code; |
| 1926 |
condcount = 0; |
condcount = 0; |
| 1987 |
{ |
{ |
| 1988 |
if (-c >= ESC_REF) |
if (-c >= ESC_REF) |
| 1989 |
{ |
{ |
| 1990 |
|
int number = -c - ESC_REF; |
| 1991 |
previous = code; |
previous = code; |
| 1992 |
*code++ = OP_REF; |
*code++ = OP_REF; |
| 1993 |
*code++ = -c - ESC_REF; |
*code++ = number >> 8; |
| 1994 |
|
*code++ = number & 255; |
| 1995 |
} |
} |
| 1996 |
else |
else |
| 1997 |
{ |
{ |
| 2026 |
{ |
{ |
| 2027 |
/* The space before the ; is to avoid a warning on a silly compiler |
/* The space before the ; is to avoid a warning on a silly compiler |
| 2028 |
on the Macintosh. */ |
on the Macintosh. */ |
| 2029 |
while ((c = *(++ptr)) != 0 && c != '\n') ; |
while ((c = *(++ptr)) != 0 && c != NEWLINE) ; |
| 2030 |
if (c == 0) break; |
if (c == 0) break; |
| 2031 |
continue; |
continue; |
| 2032 |
} |
} |
| 2115 |
ptrptr -> the address of the current pattern pointer |
ptrptr -> the address of the current pattern pointer |
| 2116 |
errorptr -> pointer to error message |
errorptr -> pointer to error message |
| 2117 |
lookbehind TRUE if this is a lookbehind assertion |
lookbehind TRUE if this is a lookbehind assertion |
| 2118 |
condref >= 0 for OPT_CREF setting at start of conditional group |
skipbytes skip this many bytes at start (for OP_COND, OP_BRANUMBER) |
| 2119 |
reqchar -> place to put the last required character, or a negative number |
reqchar -> place to put the last required character, or a negative number |
| 2120 |
countlits -> place to put the shortest literal count of any branch |
countlits -> place to put the shortest literal count of any branch |
| 2121 |
cd points to the data block with tables pointers |
cd points to the data block with tables pointers |
| 2125 |
|
|
| 2126 |
static BOOL |
static BOOL |
| 2127 |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
| 2128 |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int skipbytes, |
| 2129 |
int *reqchar, int *countlits, compile_data *cd) |
int *reqchar, int *countlits, compile_data *cd) |
| 2130 |
{ |
{ |
| 2131 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
| 2138 |
|
|
| 2139 |
*reqchar = -1; |
*reqchar = -1; |
| 2140 |
*countlits = INT_MAX; |
*countlits = INT_MAX; |
| 2141 |
code += 3; |
code += 3 + skipbytes; |
|
|
|
|
/* At the start of a reference-based conditional group, insert the reference |
|
|
number as an OP_CREF item. */ |
|
|
|
|
|
if (condref >= 0) |
|
|
{ |
|
|
*code++ = OP_CREF; |
|
|
*code++ = condref; |
|
|
} |
|
| 2142 |
|
|
| 2143 |
/* Loop for each alternative branch */ |
/* Loop for each alternative branch */ |
| 2144 |
|
|
| 2290 |
break; |
break; |
| 2291 |
|
|
| 2292 |
case OP_CREF: |
case OP_CREF: |
| 2293 |
code += 2; |
case OP_BRANUMBER: |
| 2294 |
|
code += 3; |
| 2295 |
break; |
break; |
| 2296 |
|
|
| 2297 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
| 2554 |
{ |
{ |
| 2555 |
int min, max; |
int min, max; |
| 2556 |
int class_charcount; |
int class_charcount; |
| 2557 |
|
int bracket_length; |
| 2558 |
|
|
| 2559 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
| 2560 |
{ |
{ |
| 2563 |
{ |
{ |
| 2564 |
/* The space before the ; is to avoid a warning on a silly compiler |
/* The space before the ; is to avoid a warning on a silly compiler |
| 2565 |
on the Macintosh. */ |
on the Macintosh. */ |
| 2566 |
while ((c = *(++ptr)) != 0 && c != '\n') ; |
while ((c = *(++ptr)) != 0 && c != NEWLINE) ; |
| 2567 |
continue; |
continue; |
| 2568 |
} |
} |
| 2569 |
} |
} |
| 2589 |
} |
} |
| 2590 |
length++; |
length++; |
| 2591 |
|
|
| 2592 |
/* A back reference needs an additional char, plus either one or 5 |
/* A back reference needs an additional 2 bytes, plus either one or 5 |
| 2593 |
bytes for a repeat. We also need to keep the value of the highest |
bytes for a repeat. We also need to keep the value of the highest |
| 2594 |
back reference. */ |
back reference. */ |
| 2595 |
|
|
| 2597 |
{ |
{ |
| 2598 |
int refnum = -c - ESC_REF; |
int refnum = -c - ESC_REF; |
| 2599 |
if (refnum > top_backref) top_backref = refnum; |
if (refnum > top_backref) top_backref = refnum; |
| 2600 |
length++; /* For single back reference */ |
length += 2; /* For single back reference */ |
| 2601 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
| 2602 |
{ |
{ |
| 2603 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
| 2695 |
|
|
| 2696 |
case '(': |
case '(': |
| 2697 |
branch_newextra = 0; |
branch_newextra = 0; |
| 2698 |
|
bracket_length = 3; |
| 2699 |
|
|
| 2700 |
/* Handle special forms of bracket, which all start (? */ |
/* Handle special forms of bracket, which all start (? */ |
| 2701 |
|
|
| 2763 |
if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) |
if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) |
| 2764 |
{ |
{ |
| 2765 |
ptr += 4; |
ptr += 4; |
| 2766 |
length += 2; |
length += 3; |
| 2767 |
while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; |
while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; |
| 2768 |
if (*ptr != ')') |
if (*ptr != ')') |
| 2769 |
{ |
{ |
| 2890 |
} |
} |
| 2891 |
|
|
| 2892 |
/* Extracting brackets must be counted so we can process escapes in a |
/* Extracting brackets must be counted so we can process escapes in a |
| 2893 |
Perlish way. */ |
Perlish way. If the number exceeds EXTRACT_BASIC_MAX we are going to |
| 2894 |
|
need an additional 3 bytes of store per extracting bracket. */ |
| 2895 |
|
|
| 2896 |
else bracount++; |
else |
| 2897 |
|
{ |
| 2898 |
|
bracount++; |
| 2899 |
|
if (bracount > EXTRACT_BASIC_MAX) bracket_length += 3; |
| 2900 |
|
} |
| 2901 |
|
|
| 2902 |
/* Non-special forms of bracket. Save length for computing whole length |
/* Save length for computing whole length at end if there's a repeat that |
| 2903 |
at end if there's a repeat that requires duplication of the group. Also |
requires duplication of the group. Also save the current value of |
| 2904 |
save the current value of branch_extra, and start the new group with |
branch_extra, and start the new group with the new value. If non-zero, this |
| 2905 |
the new value. If non-zero, this will either be 2 for a (?imsx: group, or 3 |
will either be 2 for a (?imsx: group, or 3 for a lookbehind assertion. */ |
|
for a lookbehind assertion. */ |
|
| 2906 |
|
|
| 2907 |
if (brastackptr >= sizeof(brastack)/sizeof(int)) |
if (brastackptr >= sizeof(brastack)/sizeof(int)) |
| 2908 |
{ |
{ |
| 2914 |
branch_extra = branch_newextra; |
branch_extra = branch_newextra; |
| 2915 |
|
|
| 2916 |
brastack[brastackptr++] = length; |
brastack[brastackptr++] = length; |
| 2917 |
length += 3; |
length += bracket_length; |
| 2918 |
continue; |
continue; |
| 2919 |
|
|
| 2920 |
/* Handle ket. Look for subsequent max/min; for certain sets of values we |
/* Handle ket. Look for subsequent max/min; for certain sets of values we |
| 2994 |
{ |
{ |
| 2995 |
/* The space before the ; is to avoid a warning on a silly compiler |
/* The space before the ; is to avoid a warning on a silly compiler |
| 2996 |
on the Macintosh. */ |
on the Macintosh. */ |
| 2997 |
while ((c = *(++ptr)) != 0 && c != '\n') ; |
while ((c = *(++ptr)) != 0 && c != NEWLINE) ; |
| 2998 |
continue; |
continue; |
| 2999 |
} |
} |
| 3000 |
} |
} |
| 3075 |
code = re->code; |
code = re->code; |
| 3076 |
*code = OP_BRA; |
*code = OP_BRA; |
| 3077 |
bracount = 0; |
bracount = 0; |
| 3078 |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, 0, |
| 3079 |
&reqchar, &countlits, &compile_block); |
&reqchar, &countlits, &compile_block); |
| 3080 |
re->top_bracket = bracount; |
re->top_bracket = bracount; |
| 3081 |
re->top_backref = top_backref; |
re->top_backref = top_backref; |
| 3189 |
|
|
| 3190 |
if (*code >= OP_BRA) |
if (*code >= OP_BRA) |
| 3191 |
{ |
{ |
| 3192 |
printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); |
if (*code - OP_BRA > EXTRACT_BASIC_MAX) |
| 3193 |
|
printf("%3d Bra extra", (code[1] << 8) + code[2]); |
| 3194 |
|
else |
| 3195 |
|
printf("%3d Bra %d", (code[1] << 8) + code[2], *code - OP_BRA); |
| 3196 |
code += 2; |
code += 2; |
| 3197 |
} |
} |
| 3198 |
|
|
| 3203 |
code++; |
code++; |
| 3204 |
break; |
break; |
| 3205 |
|
|
|
case OP_COND: |
|
|
printf("%3d Cond", (code[1] << 8) + code[2]); |
|
|
code += 2; |
|
|
break; |
|
|
|
|
|
case OP_CREF: |
|
|
printf(" %.2d %s", code[1], OP_names[*code]); |
|
|
code++; |
|
|
break; |
|
|
|
|
| 3206 |
case OP_CHARS: |
case OP_CHARS: |
| 3207 |
charlength = *(++code); |
charlength = *(++code); |
| 3208 |
printf("%3d ", charlength); |
printf("%3d ", charlength); |
| 3219 |
case OP_ASSERTBACK: |
case OP_ASSERTBACK: |
| 3220 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
| 3221 |
case OP_ONCE: |
case OP_ONCE: |
|
printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); |
|
|
code += 2; |
|
|
break; |
|
|
|
|
| 3222 |
case OP_REVERSE: |
case OP_REVERSE: |
| 3223 |
|
case OP_BRANUMBER: |
| 3224 |
|
case OP_COND: |
| 3225 |
|
case OP_CREF: |
| 3226 |
printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); |
printf("%3d %s", (code[1] << 8) + code[2], OP_names[*code]); |
| 3227 |
code += 2; |
code += 2; |
| 3228 |
break; |
break; |
| 3295 |
break; |
break; |
| 3296 |
|
|
| 3297 |
case OP_REF: |
case OP_REF: |
| 3298 |
printf(" \\%d", *(++code)); |
printf(" \\%d", (code[1] << 8) | code[2]); |
| 3299 |
code ++; |
code += 3; |
| 3300 |
goto CLASS_REF_REPEAT; |
goto CLASS_REF_REPEAT; |
| 3301 |
|
|
| 3302 |
case OP_CLASS: |
case OP_CLASS: |
| 3509 |
|
|
| 3510 |
if (op > OP_BRA) |
if (op > OP_BRA) |
| 3511 |
{ |
{ |
| 3512 |
|
int offset; |
| 3513 |
int number = op - OP_BRA; |
int number = op - OP_BRA; |
| 3514 |
int offset = number << 1; |
|
| 3515 |
|
/* For extended extraction brackets (large number), we have to fish out the |
| 3516 |
|
number from a dummy opcode at the start. */ |
| 3517 |
|
|
| 3518 |
|
if (number > EXTRACT_BASIC_MAX) number = (ecode[4] << 8) | ecode[5]; |
| 3519 |
|
offset = number << 1; |
| 3520 |
|
|
| 3521 |
#ifdef DEBUG |
#ifdef DEBUG |
| 3522 |
printf("start bracket %d subject=", number); |
printf("start bracket %d subject=", number); |
| 3546 |
md->offset_vector[offset] = save_offset1; |
md->offset_vector[offset] = save_offset1; |
| 3547 |
md->offset_vector[offset+1] = save_offset2; |
md->offset_vector[offset+1] = save_offset2; |
| 3548 |
md->offset_vector[md->offset_end - number] = save_offset3; |
md->offset_vector[md->offset_end - number] = save_offset3; |
| 3549 |
|
|
| 3550 |
return FALSE; |
return FALSE; |
| 3551 |
} |
} |
| 3552 |
|
|
| 3579 |
case OP_COND: |
case OP_COND: |
| 3580 |
if (ecode[3] == OP_CREF) /* Condition is extraction test */ |
if (ecode[3] == OP_CREF) /* Condition is extraction test */ |
| 3581 |
{ |
{ |
| 3582 |
int offset = ecode[4] << 1; /* Doubled reference number */ |
int offset = (ecode[4] << 9) | (ecode[5] << 1); /* Doubled ref number */ |
| 3583 |
return match(eptr, |
return match(eptr, |
| 3584 |
ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)? |
ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)? |
| 3585 |
5 : 3 + (ecode[1] << 8) + ecode[2]), |
6 : 3 + (ecode[1] << 8) + ecode[2]), |
| 3586 |
offset_top, md, ims, eptrb, match_isgroup); |
offset_top, md, ims, eptrb, match_isgroup); |
| 3587 |
} |
} |
| 3588 |
|
|
| 3602 |
} |
} |
| 3603 |
/* Control never reaches here */ |
/* Control never reaches here */ |
| 3604 |
|
|
| 3605 |
/* Skip over conditional reference data if encountered (should not be) */ |
/* Skip over conditional reference or large extraction number data if |
| 3606 |
|
encountered. */ |
| 3607 |
|
|
| 3608 |
case OP_CREF: |
case OP_CREF: |
| 3609 |
ecode += 2; |
case OP_BRANUMBER: |
| 3610 |
|
ecode += 3; |
| 3611 |
break; |
break; |
| 3612 |
|
|
| 3613 |
/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched |
/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched |
| 3873 |
|
|
| 3874 |
if (*prev != OP_COND) |
if (*prev != OP_COND) |
| 3875 |
{ |
{ |
| 3876 |
|
int offset; |
| 3877 |
int number = *prev - OP_BRA; |
int number = *prev - OP_BRA; |
| 3878 |
int offset = number << 1; |
|
| 3879 |
|
/* For extended extraction brackets (large number), we have to fish out |
| 3880 |
|
the number from a dummy opcode at the start. */ |
| 3881 |
|
|
| 3882 |
|
if (number > EXTRACT_BASIC_MAX) number = (prev[4] << 8) | prev[5]; |
| 3883 |
|
offset = number << 1; |
| 3884 |
|
|
| 3885 |
#ifdef DEBUG |
#ifdef DEBUG |
| 3886 |
printf("end bracket %d", number); |
printf("end bracket %d", number); |
| 3940 |
if (md->notbol && eptr == md->start_subject) return FALSE; |
if (md->notbol && eptr == md->start_subject) return FALSE; |
| 3941 |
if ((ims & PCRE_MULTILINE) != 0) |
if ((ims & PCRE_MULTILINE) != 0) |
| 3942 |
{ |
{ |
| 3943 |
if (eptr != md->start_subject && eptr[-1] != '\n') return FALSE; |
if (eptr != md->start_subject && eptr[-1] != NEWLINE) return FALSE; |
| 3944 |
ecode++; |
ecode++; |
| 3945 |
break; |
break; |
| 3946 |
} |
} |
| 3959 |
case OP_DOLL: |
case OP_DOLL: |
| 3960 |
if ((ims & PCRE_MULTILINE) != 0) |
if ((ims & PCRE_MULTILINE) != 0) |
| 3961 |
{ |
{ |
| 3962 |
if (eptr < md->end_subject) { if (*eptr != '\n') return FALSE; } |
if (eptr < md->end_subject) { if (*eptr != NEWLINE) return FALSE; } |
| 3963 |
else { if (md->noteol) return FALSE; } |
else { if (md->noteol) return FALSE; } |
| 3964 |
ecode++; |
ecode++; |
| 3965 |
break; |
break; |
| 3970 |
if (!md->endonly) |
if (!md->endonly) |
| 3971 |
{ |
{ |
| 3972 |
if (eptr < md->end_subject - 1 || |
if (eptr < md->end_subject - 1 || |
| 3973 |
(eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE; |
(eptr == md->end_subject - 1 && *eptr != NEWLINE)) return FALSE; |
| 3974 |
|
|
| 3975 |
ecode++; |
ecode++; |
| 3976 |
break; |
break; |
| 3989 |
|
|
| 3990 |
case OP_EODN: |
case OP_EODN: |
| 3991 |
if (eptr < md->end_subject - 1 || |
if (eptr < md->end_subject - 1 || |
| 3992 |
(eptr == md->end_subject - 1 && *eptr != '\n')) return FALSE; |
(eptr == md->end_subject - 1 && *eptr != NEWLINE)) return FALSE; |
| 3993 |
ecode++; |
ecode++; |
| 3994 |
break; |
break; |
| 3995 |
|
|
| 4011 |
/* Match a single character type; inline for speed */ |
/* Match a single character type; inline for speed */ |
| 4012 |
|
|
| 4013 |
case OP_ANY: |
case OP_ANY: |
| 4014 |
if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n') |
if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE) |
| 4015 |
return FALSE; |
return FALSE; |
| 4016 |
if (eptr++ >= md->end_subject) return FALSE; |
if (eptr++ >= md->end_subject) return FALSE; |
| 4017 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 4074 |
case OP_REF: |
case OP_REF: |
| 4075 |
{ |
{ |
| 4076 |
int length; |
int length; |
| 4077 |
int offset = ecode[1] << 1; /* Doubled reference number */ |
int offset = (ecode[1] << 9) | (ecode[2] << 1); /* Doubled ref number */ |
| 4078 |
ecode += 2; /* Advance past the item */ |
ecode += 3; /* Advance past item */ |
| 4079 |
|
|
| 4080 |
/* If the reference is unset, set the length to be longer than the amount |
/* If the reference is unset, set the length to be longer than the amount |
| 4081 |
of subject left; this ensures that every attempt at a match fails. We |
of subject left; this ensures that every attempt at a match fails. We |
| 4619 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 4620 |
{ |
{ |
| 4621 |
if (eptr >= md->end_subject || |
if (eptr >= md->end_subject || |
| 4622 |
(*eptr++ == '\n' && (ims & PCRE_DOTALL) == 0)) |
(*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0)) |
| 4623 |
return FALSE; |
return FALSE; |
| 4624 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 4625 |
} |
} |
| 4628 |
#endif |
#endif |
| 4629 |
/* Non-UTF8 can be faster */ |
/* Non-UTF8 can be faster */ |
| 4630 |
if ((ims & PCRE_DOTALL) == 0) |
if ((ims & PCRE_DOTALL) == 0) |
| 4631 |
{ for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; } |
{ for (i = 1; i <= min; i++) if (*eptr++ == NEWLINE) return FALSE; } |
| 4632 |
else eptr += min; |
else eptr += min; |
| 4633 |
break; |
break; |
| 4634 |
|
|
| 4683 |
switch(ctype) |
switch(ctype) |
| 4684 |
{ |
{ |
| 4685 |
case OP_ANY: |
case OP_ANY: |
| 4686 |
if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE; |
if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) return FALSE; |
| 4687 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 4688 |
if (md->utf8) |
if (md->utf8) |
| 4689 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 4738 |
{ |
{ |
| 4739 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 4740 |
{ |
{ |
| 4741 |
if (eptr >= md->end_subject || *eptr++ == '\n') break; |
if (eptr >= md->end_subject || *eptr++ == NEWLINE) break; |
| 4742 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 4743 |
} |
} |
| 4744 |
} |
} |
| 4758 |
{ |
{ |
| 4759 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 4760 |
{ |
{ |
| 4761 |
if (eptr >= md->end_subject || *eptr == '\n') break; |
if (eptr >= md->end_subject || *eptr == NEWLINE) break; |
| 4762 |
eptr++; |
eptr++; |
| 4763 |
} |
} |
| 4764 |
} |
} |
| 4899 |
const real_pcre *re = (const real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
| 4900 |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
| 4901 |
BOOL using_temporary_offsets = FALSE; |
BOOL using_temporary_offsets = FALSE; |
| 4902 |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
BOOL anchored; |
| 4903 |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
BOOL startline; |
| 4904 |
|
|
| 4905 |
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; |
if ((options & ~PUBLIC_EXEC_OPTIONS) != 0) return PCRE_ERROR_BADOPTION; |
| 4906 |
|
|
| 4908 |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
| 4909 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
| 4910 |
|
|
| 4911 |
|
anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
| 4912 |
|
startline = (re->options & PCRE_STARTLINE) != 0; |
| 4913 |
|
|
| 4914 |
match_block.start_pattern = re->code; |
match_block.start_pattern = re->code; |
| 4915 |
match_block.start_subject = (const uschar *)subject; |
match_block.start_subject = (const uschar *)subject; |
| 4916 |
match_block.end_subject = match_block.start_subject + length; |
match_block.end_subject = match_block.start_subject + length; |
| 5039 |
{ |
{ |
| 5040 |
if (start_match > match_block.start_subject + start_offset) |
if (start_match > match_block.start_subject + start_offset) |
| 5041 |
{ |
{ |
| 5042 |
while (start_match < end_subject && start_match[-1] != '\n') |
while (start_match < end_subject && start_match[-1] != NEWLINE) |
| 5043 |
start_match++; |
start_match++; |
| 5044 |
} |
} |
| 5045 |
} |
} |