| 9 |
|
|
| 10 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
| 11 |
|
|
| 12 |
Copyright (c) 1997 University of Cambridge |
Copyright (c) 1998 University of Cambridge |
| 13 |
|
|
| 14 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 15 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
| 33 |
|
|
| 34 |
/* #define DEBUG */ |
/* #define DEBUG */ |
| 35 |
|
|
| 36 |
|
/* Use a macro for debugging printing, 'cause that eliminates the the use |
| 37 |
|
of #ifdef inline, and there are *still* stupid compilers about that don't like |
| 38 |
|
indented pre-processor statements. I suppose it's only been 10 years... */ |
| 39 |
|
|
| 40 |
|
#ifdef DEBUG |
| 41 |
|
#define DPRINTF(p) printf p |
| 42 |
|
#else |
| 43 |
|
#define DPRINTF(p) /*nothing*/ |
| 44 |
|
#endif |
| 45 |
|
|
| 46 |
/* Include the internals header, which itself includes Standard C headers plus |
/* Include the internals header, which itself includes Standard C headers plus |
| 47 |
the external pcre header. */ |
the external pcre header. */ |
| 49 |
#include "internal.h" |
#include "internal.h" |
| 50 |
|
|
| 51 |
|
|
| 52 |
|
/* Allow compilation as C++ source code, should anybody want to do that. */ |
| 53 |
|
|
| 54 |
|
#ifdef __cplusplus |
| 55 |
|
#define class pcre_class |
| 56 |
|
#endif |
| 57 |
|
|
| 58 |
|
|
| 59 |
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ |
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ |
| 60 |
|
|
| 61 |
static char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
| 62 |
static char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
static const char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
| 63 |
|
|
| 64 |
/* Text forms of OP_ values and things, for debugging */ |
/* Text forms of OP_ values and things, for debugging (not all used) */ |
| 65 |
|
|
| 66 |
#ifdef DEBUG |
#ifdef DEBUG |
| 67 |
static const char *OP_names[] = { |
static const char *OP_names[] = { |
| 72 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
| 73 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
| 74 |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
| 75 |
"class", "Ref", |
"class", "negclass", "Ref", |
| 76 |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
| 77 |
"Brazero", "Braminzero", "Bra" |
"Brazero", "Braminzero", "Bra" |
| 78 |
}; |
}; |
| 83 |
on. Zero means further processing is needed (for things like \x), or the escape |
on. Zero means further processing is needed (for things like \x), or the escape |
| 84 |
is invalid. */ |
is invalid. */ |
| 85 |
|
|
| 86 |
static short int escapes[] = { |
static const short int escapes[] = { |
| 87 |
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ |
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ |
| 88 |
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ |
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ |
| 89 |
'@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */ |
'@', -ESC_A, -ESC_B, 0, -ESC_D, 0, 0, 0, /* @ - G */ |
| 98 |
|
|
| 99 |
/* Definition to allow mutual recursion */ |
/* Definition to allow mutual recursion */ |
| 100 |
|
|
| 101 |
static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **); |
static BOOL |
| 102 |
|
compile_regex(int, int *, uschar **, const uschar **, const char **); |
| 103 |
|
|
| 104 |
/* Structure for passing "static" information around between the functions |
/* Structure for passing "static" information around between the functions |
| 105 |
doing the matching, so that they are thread-safe. */ |
doing the matching, so that they are thread-safe. */ |
| 204 |
Returns: nothing |
Returns: nothing |
| 205 |
*/ |
*/ |
| 206 |
|
|
| 207 |
static pchars(uschar *p, int length, BOOL is_subject, match_data *md) |
static void |
| 208 |
|
pchars(const uschar *p, int length, BOOL is_subject, match_data *md) |
| 209 |
{ |
{ |
| 210 |
int c; |
int c; |
| 211 |
if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
if (is_subject && length > md->end_subject - p) length = md->end_subject - p; |
| 314 |
/* Check a class or a back reference for a zero minimum */ |
/* Check a class or a back reference for a zero minimum */ |
| 315 |
|
|
| 316 |
case OP_CLASS: |
case OP_CLASS: |
| 317 |
|
case OP_NEGCLASS: |
| 318 |
case OP_REF: |
case OP_REF: |
| 319 |
cc += (*cc == OP_REF)? 2 : 33; |
cc += (*cc == OP_REF)? 2 : 33; |
| 320 |
|
|
| 679 |
|
|
| 680 |
case '[': |
case '[': |
| 681 |
previous = code; |
previous = code; |
|
*code++ = OP_CLASS; |
|
| 682 |
|
|
| 683 |
/* If the first character is '^', set the negation flag */ |
/* If the first character is '^', set the negation flag, and use a |
| 684 |
|
different opcode. This only matters if caseless matching is specified at |
| 685 |
|
runtime. */ |
| 686 |
|
|
| 687 |
if ((c = *(++ptr)) == '^') |
if ((c = *(++ptr)) == '^') |
| 688 |
{ |
{ |
| 689 |
negate_class = TRUE; |
negate_class = TRUE; |
| 690 |
|
*code++ = OP_NEGCLASS; |
| 691 |
c = *(++ptr); |
c = *(++ptr); |
| 692 |
} |
} |
| 693 |
else negate_class = FALSE; |
else |
| 694 |
|
{ |
| 695 |
|
negate_class = FALSE; |
| 696 |
|
*code++ = OP_CLASS; |
| 697 |
|
} |
| 698 |
|
|
| 699 |
/* Keep a count of chars so that we can optimize the case of just a single |
/* Keep a count of chars so that we can optimize the case of just a single |
| 700 |
character. */ |
character. */ |
| 1002 |
if (code == previous) code += 2; else previous[1]++; |
if (code == previous) code += 2; else previous[1]++; |
| 1003 |
} |
} |
| 1004 |
|
|
| 1005 |
/* Insert an UPTO if the max is greater than the min. */ |
/* If the maximum is unlimited, insert an OP_STAR. */ |
| 1006 |
|
|
| 1007 |
|
if (repeat_max < 0) |
| 1008 |
|
{ |
| 1009 |
|
*code++ = c; |
| 1010 |
|
*code++ = OP_STAR + repeat_type; |
| 1011 |
|
} |
| 1012 |
|
|
| 1013 |
|
/* Else insert an UPTO if the max is greater than the min. */ |
| 1014 |
|
|
| 1015 |
if (repeat_max != repeat_min) |
else if (repeat_max != repeat_min) |
| 1016 |
{ |
{ |
| 1017 |
*code++ = c; |
*code++ = c; |
| 1018 |
repeat_max -= repeat_min; |
repeat_max -= repeat_min; |
| 1030 |
/* If previous was a character class or a back reference, we put the repeat |
/* If previous was a character class or a back reference, we put the repeat |
| 1031 |
stuff after it. */ |
stuff after it. */ |
| 1032 |
|
|
| 1033 |
else if (*previous == OP_CLASS || *previous == OP_REF) |
else if (*previous == OP_CLASS || *previous == OP_NEGCLASS || |
| 1034 |
|
*previous == OP_REF) |
| 1035 |
{ |
{ |
| 1036 |
if (repeat_min == 0 && repeat_max == -1) |
if (repeat_min == 0 && repeat_max == -1) |
| 1037 |
*code++ = OP_CRSTAR + repeat_type; |
*code++ = OP_CRSTAR + repeat_type; |
| 1057 |
else if ((int)*previous >= OP_BRA) |
else if ((int)*previous >= OP_BRA) |
| 1058 |
{ |
{ |
| 1059 |
int i; |
int i; |
| 1060 |
int length = code - previous; |
int len = code - previous; |
| 1061 |
|
|
| 1062 |
if (repeat_max == -1 && could_be_empty(previous)) |
if (repeat_max == -1 && could_be_empty(previous)) |
| 1063 |
{ |
{ |
| 1074 |
{ |
{ |
| 1075 |
for (i = 1; i < repeat_min; i++) |
for (i = 1; i < repeat_min; i++) |
| 1076 |
{ |
{ |
| 1077 |
memcpy(code, previous, length); |
memcpy(code, previous, len); |
| 1078 |
code += length; |
code += len; |
| 1079 |
} |
} |
| 1080 |
} |
} |
| 1081 |
|
|
| 1087 |
{ |
{ |
| 1088 |
if (repeat_min == 0) |
if (repeat_min == 0) |
| 1089 |
{ |
{ |
| 1090 |
memmove(previous+1, previous, length); |
memmove(previous+1, previous, len); |
| 1091 |
code++; |
code++; |
| 1092 |
*previous++ = OP_BRAZERO + repeat_type; |
*previous++ = OP_BRAZERO + repeat_type; |
| 1093 |
} |
} |
| 1094 |
|
|
| 1095 |
for (i = 1; i < repeat_min; i++) |
for (i = 1; i < repeat_min; i++) |
| 1096 |
{ |
{ |
| 1097 |
memcpy(code, previous, length); |
memcpy(code, previous, len); |
| 1098 |
code += length; |
code += len; |
| 1099 |
} |
} |
| 1100 |
|
|
| 1101 |
for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++) |
for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++) |
| 1102 |
{ |
{ |
| 1103 |
*code++ = OP_BRAZERO + repeat_type; |
*code++ = OP_BRAZERO + repeat_type; |
| 1104 |
memcpy(code, previous, length); |
memcpy(code, previous, len); |
| 1105 |
code += length; |
code += len; |
| 1106 |
} |
} |
| 1107 |
} |
} |
| 1108 |
|
|
| 1303 |
the next state. */ |
the next state. */ |
| 1304 |
|
|
| 1305 |
previous[1] = length; |
previous[1] = length; |
| 1306 |
ptr--; |
if (length < 255) ptr--; |
| 1307 |
break; |
break; |
| 1308 |
} |
} |
| 1309 |
} /* end of big loop */ |
} /* end of big loop */ |
| 1563 |
return NULL; |
return NULL; |
| 1564 |
} |
} |
| 1565 |
|
|
| 1566 |
#ifdef DEBUG |
DPRINTF(("------------------------------------------------------------------\n")); |
| 1567 |
printf("------------------------------------------------------------------\n"); |
DPRINTF(("%s\n", pattern)); |
|
printf("%s\n", pattern); |
|
|
#endif |
|
| 1568 |
|
|
| 1569 |
/* The first thing to do is to make a pass over the pattern to compute the |
/* The first thing to do is to make a pass over the pattern to compute the |
| 1570 |
amount of store required to hold the compiled code. This does not have to be |
amount of store required to hold the compiled code. This does not have to be |
| 1679 |
{ |
{ |
| 1680 |
if (*ptr == '\\') |
if (*ptr == '\\') |
| 1681 |
{ |
{ |
| 1682 |
int c = check_escape(&ptr, errorptr, bracount, options, TRUE); |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE); |
| 1683 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 1684 |
if (-c == ESC_b) class_charcount++; else class_charcount = 10; |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
| 1685 |
} |
} |
| 1686 |
else class_charcount++; |
else class_charcount++; |
| 1687 |
ptr++; |
ptr++; |
| 1696 |
|
|
| 1697 |
/* A repeat needs either 1 or 5 bytes. */ |
/* A repeat needs either 1 or 5 bytes. */ |
| 1698 |
|
|
| 1699 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
| 1700 |
{ |
{ |
| 1701 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
| 1702 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 1804 |
continue; |
continue; |
| 1805 |
|
|
| 1806 |
/* Handle ket. Look for subsequent max/min; for certain sets of values we |
/* Handle ket. Look for subsequent max/min; for certain sets of values we |
| 1807 |
have to replicate this bracket up to that many times. */ |
have to replicate this bracket up to that many times. If brastackptr is |
| 1808 |
|
0 this is an unmatched bracket which will generate an error, but take care |
| 1809 |
|
not to try to access brastack[-1]. */ |
| 1810 |
|
|
| 1811 |
case ')': |
case ')': |
| 1812 |
length += 3; |
length += 3; |
| 1813 |
{ |
{ |
| 1814 |
int min = 1; |
int minval = 1; |
| 1815 |
int max = 1; |
int maxval = 1; |
| 1816 |
int duplength = length - brastack[--brastackptr]; |
int duplength = (brastackptr > 0)? length - brastack[--brastackptr] : 0; |
| 1817 |
|
|
| 1818 |
/* Leave ptr at the final char; for read_repeat_counts this happens |
/* Leave ptr at the final char; for read_repeat_counts this happens |
| 1819 |
automatically; for the others we need an increment. */ |
automatically; for the others we need an increment. */ |
| 1820 |
|
|
| 1821 |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
| 1822 |
{ |
{ |
| 1823 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr); |
| 1824 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 1825 |
} |
} |
| 1826 |
else if (c == '*') { min = 0; max = -1; ptr++; } |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
| 1827 |
else if (c == '+') { max = -1; ptr++; } |
else if (c == '+') { maxval = -1; ptr++; } |
| 1828 |
else if (c == '?') { min = 0; ptr++; } |
else if (c == '?') { minval = 0; ptr++; } |
| 1829 |
|
|
| 1830 |
/* If there is a minimum > 1 we have to replicate up to min-1 times; if |
/* If there is a minimum > 1 we have to replicate up to minval-1 times; |
| 1831 |
there is a limited maximum we have to replicate up to max-1 times and |
if there is a limited maximum we have to replicate up to maxval-1 times |
| 1832 |
allow for a BRAZERO item before each optional copy, as we also have to |
and allow for a BRAZERO item before each optional copy, as we also have |
| 1833 |
do before the first copy if the minimum is zero. */ |
to do before the first copy if the minimum is zero. */ |
| 1834 |
|
|
| 1835 |
if (min == 0) length++; |
if (minval == 0) length++; |
| 1836 |
else if (min > 1) length += (min - 1) * duplength; |
else if (minval > 1) length += (minval - 1) * duplength; |
| 1837 |
if (max > min) length += (max - min) * (duplength + 1); |
if (maxval > minval) length += (maxval - minval) * (duplength + 1); |
| 1838 |
} |
} |
|
|
|
| 1839 |
continue; |
continue; |
| 1840 |
|
|
| 1841 |
/* Non-special character. For a run of such characters the length required |
/* Non-special character. For a run of such characters the length required |
| 1896 |
} |
} |
| 1897 |
|
|
| 1898 |
/* Compute the size of data block needed and get it, either from malloc or |
/* Compute the size of data block needed and get it, either from malloc or |
| 1899 |
externally provided function. Put in the magic number and the options. */ |
externally provided function. We specify "code[0]" in the offsetof() expression |
| 1900 |
|
rather than just "code", because it has been reported that one broken compiler |
| 1901 |
|
fails on "code" because it is also an independent variable. It should make no |
| 1902 |
|
difference to the value of the offsetof(). */ |
| 1903 |
|
|
| 1904 |
size = length + offsetof(real_pcre, code); |
size = length + offsetof(real_pcre, code[0]); |
| 1905 |
re = (real_pcre *)(pcre_malloc)(size); |
re = (real_pcre *)(pcre_malloc)(size); |
| 1906 |
|
|
| 1907 |
if (re == NULL) |
if (re == NULL) |
| 1910 |
return NULL; |
return NULL; |
| 1911 |
} |
} |
| 1912 |
|
|
| 1913 |
|
/* Put in the magic number and the options. */ |
| 1914 |
|
|
| 1915 |
re->magic_number = MAGIC_NUMBER; |
re->magic_number = MAGIC_NUMBER; |
| 1916 |
re->options = options; |
re->options = options; |
| 1917 |
|
|
| 1962 |
re->options |= PCRE_ANCHORED; |
re->options |= PCRE_ANCHORED; |
| 1963 |
else |
else |
| 1964 |
{ |
{ |
| 1965 |
int c = find_firstchar(re->code); |
int ch = find_firstchar(re->code); |
| 1966 |
if (c >= 0) |
if (ch >= 0) |
| 1967 |
{ |
{ |
| 1968 |
re->first_char = c; |
re->first_char = ch; |
| 1969 |
re->options |= PCRE_FIRSTSET; |
re->options |= PCRE_FIRSTSET; |
| 1970 |
} |
} |
| 1971 |
else if (is_startline(re->code)) |
else if (is_startline(re->code)) |
| 2057 |
case OP_MINUPTO: |
case OP_MINUPTO: |
| 2058 |
if (isprint(c = code[3])) printf(" %c{", c); |
if (isprint(c = code[3])) printf(" %c{", c); |
| 2059 |
else printf(" \\x%02x{", c); |
else printf(" \\x%02x{", c); |
| 2060 |
if (*code != OP_EXACT) printf(","); |
if (*code != OP_EXACT) printf("0,"); |
| 2061 |
printf("%d}", (code[1] << 8) + code[2]); |
printf("%d}", (code[1] << 8) + code[2]); |
| 2062 |
if (*code == OP_MINUPTO) printf("?"); |
if (*code == OP_MINUPTO) printf("?"); |
| 2063 |
code += 3; |
code += 3; |
| 2102 |
|
|
| 2103 |
case OP_REF: |
case OP_REF: |
| 2104 |
printf(" \\%d", *(++code)); |
printf(" \\%d", *(++code)); |
| 2105 |
break; |
code ++; |
| 2106 |
|
goto CLASS_REF_REPEAT; |
| 2107 |
|
|
| 2108 |
case OP_CLASS: |
case OP_CLASS: |
| 2109 |
|
case OP_NEGCLASS: |
| 2110 |
{ |
{ |
| 2111 |
int i, min, max; |
int i, min, max; |
| 2112 |
|
|
| 2113 |
code++; |
if (*code++ == OP_CLASS) printf(" ["); |
| 2114 |
printf(" ["); |
else printf(" ^["); |
| 2115 |
|
|
| 2116 |
for (i = 0; i < 256; i++) |
for (i = 0; i < 256; i++) |
| 2117 |
{ |
{ |
| 2134 |
printf("]"); |
printf("]"); |
| 2135 |
code += 32; |
code += 32; |
| 2136 |
|
|
| 2137 |
|
CLASS_REF_REPEAT: |
| 2138 |
|
|
| 2139 |
switch(*code) |
switch(*code) |
| 2140 |
{ |
{ |
| 2141 |
case OP_CRSTAR: |
case OP_CRSTAR: |
| 2320 |
int number = (*ecode - OP_BRA) << 1; |
int number = (*ecode - OP_BRA) << 1; |
| 2321 |
int save_offset1 = 0, save_offset2 = 0; |
int save_offset1 = 0, save_offset2 = 0; |
| 2322 |
|
|
| 2323 |
#ifdef DEBUG |
DPRINTF(("start bracket %d\n", number/2)); |
|
printf("start bracket %d\n", number/2); |
|
|
#endif |
|
| 2324 |
|
|
| 2325 |
if (number > 0 && number < md->offset_end) |
if (number > 0 && number < md->offset_end) |
| 2326 |
{ |
{ |
| 2328 |
save_offset2 = md->offset_vector[number+1]; |
save_offset2 = md->offset_vector[number+1]; |
| 2329 |
md->offset_vector[number] = eptr - md->start_subject; |
md->offset_vector[number] = eptr - md->start_subject; |
| 2330 |
|
|
| 2331 |
#ifdef DEBUG |
DPRINTF(("saving %d %d\n", save_offset1, save_offset2)); |
|
printf("saving %d %d\n", save_offset1, save_offset2); |
|
|
#endif |
|
| 2332 |
} |
} |
| 2333 |
|
|
| 2334 |
/* Recurse for all the alternatives. */ |
/* Recurse for all the alternatives. */ |
| 2340 |
} |
} |
| 2341 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 2342 |
|
|
| 2343 |
#ifdef DEBUG |
DPRINTF(("bracket %d failed\n", number/2)); |
|
printf("bracket %d failed\n", number/2); |
|
|
#endif |
|
| 2344 |
|
|
| 2345 |
if (number > 0 && number < md->offset_end) |
if (number > 0 && number < md->offset_end) |
| 2346 |
{ |
{ |
| 2479 |
|
|
| 2480 |
number = (*prev - OP_BRA) << 1; |
number = (*prev - OP_BRA) << 1; |
| 2481 |
|
|
| 2482 |
#ifdef DEBUG |
DPRINTF(("end bracket %d\n", number/2)); |
|
printf("end bracket %d\n", number/2); |
|
|
#endif |
|
| 2483 |
|
|
| 2484 |
if (number > 0) |
if (number > 0) |
| 2485 |
{ |
{ |
| 2731 |
item to see if there is repeat information following. Then obey similar |
item to see if there is repeat information following. Then obey similar |
| 2732 |
code to character type repeats - written out again for speed. If caseless |
code to character type repeats - written out again for speed. If caseless |
| 2733 |
matching was set at runtime but not at compile time, we have to check both |
matching was set at runtime but not at compile time, we have to check both |
| 2734 |
versions of a character. */ |
versions of a character, and we have to behave differently for positive and |
| 2735 |
|
negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are |
| 2736 |
|
treated differently. */ |
| 2737 |
|
|
| 2738 |
case OP_CLASS: |
case OP_CLASS: |
| 2739 |
|
case OP_NEGCLASS: |
| 2740 |
{ |
{ |
| 2741 |
|
BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless; |
| 2742 |
const uschar *data = ecode + 1; /* Save for matching */ |
const uschar *data = ecode + 1; /* Save for matching */ |
| 2743 |
ecode += 33; /* Advance past the item */ |
ecode += 33; /* Advance past the item */ |
| 2744 |
|
|
| 2767 |
break; |
break; |
| 2768 |
|
|
| 2769 |
default: /* No repeat follows */ |
default: /* No repeat follows */ |
| 2770 |
if (eptr >= md->end_subject) return FALSE; |
min = max = 1; |
| 2771 |
c = *eptr++; |
break; |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */ |
|
|
if (md->runtime_caseless) |
|
|
{ |
|
|
c = pcre_fcc[c]; |
|
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */ |
|
|
} |
|
|
return FALSE; |
|
| 2772 |
} |
} |
| 2773 |
|
|
| 2774 |
/* First, ensure the minimum number of matches are present. */ |
/* First, ensure the minimum number of matches are present. */ |
| 2777 |
{ |
{ |
| 2778 |
if (eptr >= md->end_subject) return FALSE; |
if (eptr >= md->end_subject) return FALSE; |
| 2779 |
c = *eptr++; |
c = *eptr++; |
| 2780 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
| 2781 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
| 2782 |
|
runtime caseless, continue if either case is in the map. */ |
| 2783 |
|
|
| 2784 |
|
if (!nasty_case) |
| 2785 |
{ |
{ |
| 2786 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2787 |
|
if (md->runtime_caseless) |
| 2788 |
|
{ |
| 2789 |
|
c = pcre_fcc[c]; |
| 2790 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2791 |
|
} |
| 2792 |
|
} |
| 2793 |
|
|
| 2794 |
|
/* Runtime caseless and it was a negative class. Continue only if |
| 2795 |
|
both cases are in the map. */ |
| 2796 |
|
|
| 2797 |
|
else |
| 2798 |
|
{ |
| 2799 |
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE; |
| 2800 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
| 2801 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2802 |
} |
} |
| 2803 |
|
|
| 2804 |
return FALSE; |
return FALSE; |
| 2805 |
} |
} |
| 2806 |
|
|
| 2819 |
if (match(eptr, ecode, offset_top, md)) return TRUE; |
if (match(eptr, ecode, offset_top, md)) return TRUE; |
| 2820 |
if (i >= max || eptr >= md->end_subject) return FALSE; |
if (i >= max || eptr >= md->end_subject) return FALSE; |
| 2821 |
c = *eptr++; |
c = *eptr++; |
| 2822 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
| 2823 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
| 2824 |
|
runtime caseless, continue if either case is in the map. */ |
| 2825 |
|
|
| 2826 |
|
if (!nasty_case) |
| 2827 |
|
{ |
| 2828 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2829 |
|
if (md->runtime_caseless) |
| 2830 |
|
{ |
| 2831 |
|
c = pcre_fcc[c]; |
| 2832 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2833 |
|
} |
| 2834 |
|
} |
| 2835 |
|
|
| 2836 |
|
/* Runtime caseless and it was a negative class. Continue only if |
| 2837 |
|
both cases are in the map. */ |
| 2838 |
|
|
| 2839 |
|
else |
| 2840 |
{ |
{ |
| 2841 |
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE; |
| 2842 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
| 2843 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2844 |
} |
} |
| 2845 |
|
|
| 2846 |
return FALSE; |
return FALSE; |
| 2847 |
} |
} |
| 2848 |
/* Control never gets here */ |
/* Control never gets here */ |
| 2857 |
{ |
{ |
| 2858 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
| 2859 |
c = *eptr; |
c = *eptr; |
| 2860 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
| 2861 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
| 2862 |
|
runtime caseless, continue if either case is in the map. */ |
| 2863 |
|
|
| 2864 |
|
if (!nasty_case) |
| 2865 |
|
{ |
| 2866 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2867 |
|
if (md->runtime_caseless) |
| 2868 |
|
{ |
| 2869 |
|
c = pcre_fcc[c]; |
| 2870 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2871 |
|
} |
| 2872 |
|
} |
| 2873 |
|
|
| 2874 |
|
/* Runtime caseless and it was a negative class. Continue only if |
| 2875 |
|
both cases are in the map. */ |
| 2876 |
|
|
| 2877 |
|
else |
| 2878 |
{ |
{ |
| 2879 |
|
if ((data[c/8] & (1 << (c&7))) == 0) break; |
| 2880 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
| 2881 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2882 |
} |
} |
| 2883 |
|
|
| 2884 |
break; |
break; |
| 2885 |
} |
} |
| 2886 |
|
|
| 2898 |
register int length = ecode[1]; |
register int length = ecode[1]; |
| 2899 |
ecode += 2; |
ecode += 2; |
| 2900 |
|
|
| 2901 |
#ifdef DEBUG |
#ifdef DEBUG /* Sigh. Some compilers never learn. */ |
| 2902 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 2903 |
printf("matching subject <null> against pattern "); |
printf("matching subject <null> against pattern "); |
| 2904 |
else |
else |
| 2909 |
} |
} |
| 2910 |
pchars(ecode, length, FALSE, md); |
pchars(ecode, length, FALSE, md); |
| 2911 |
printf("\n"); |
printf("\n"); |
| 2912 |
#endif |
#endif |
| 2913 |
|
|
| 2914 |
if (length > md->end_subject - eptr) return FALSE; |
if (length > md->end_subject - eptr) return FALSE; |
| 2915 |
if (md->caseless) |
if (md->caseless) |
| 2966 |
maximum. Alternatively, if maximizing, find the maximum number of |
maximum. Alternatively, if maximizing, find the maximum number of |
| 2967 |
characters and work backwards. */ |
characters and work backwards. */ |
| 2968 |
|
|
| 2969 |
#ifdef DEBUG |
DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max, |
| 2970 |
printf("matching %c{%d,%d} against subject %.*s\n", c, min, max, |
max, eptr)); |
|
max, eptr); |
|
|
#endif |
|
| 2971 |
|
|
| 2972 |
if (md->caseless) |
if (md->caseless) |
| 2973 |
{ |
{ |
| 3032 |
/* Match a negated single character */ |
/* Match a negated single character */ |
| 3033 |
|
|
| 3034 |
case OP_NOT: |
case OP_NOT: |
| 3035 |
if (eptr > md->end_subject) return FALSE; |
if (eptr >= md->end_subject) return FALSE; |
| 3036 |
ecode++; |
ecode++; |
| 3037 |
if (md->caseless) |
if (md->caseless) |
| 3038 |
{ |
{ |
| 3091 |
maximum. Alternatively, if maximizing, find the maximum number of |
maximum. Alternatively, if maximizing, find the maximum number of |
| 3092 |
characters and work backwards. */ |
characters and work backwards. */ |
| 3093 |
|
|
| 3094 |
#ifdef DEBUG |
DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, |
| 3095 |
printf("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, |
max, eptr)); |
|
max, eptr); |
|
|
#endif |
|
| 3096 |
|
|
| 3097 |
if (md->caseless) |
if (md->caseless) |
| 3098 |
{ |
{ |
| 3342 |
/* There's been some horrible disaster. */ |
/* There's been some horrible disaster. */ |
| 3343 |
|
|
| 3344 |
default: |
default: |
| 3345 |
#ifdef DEBUG |
DPRINTF(("Unknown opcode %d\n", *ecode)); |
|
printf("Unknown opcode %d\n", *ecode); |
|
|
#endif |
|
| 3346 |
md->errorcode = PCRE_ERROR_UNKNOWN_NODE; |
md->errorcode = PCRE_ERROR_UNKNOWN_NODE; |
| 3347 |
return FALSE; |
return FALSE; |
| 3348 |
} |
} |
| 3358 |
|
|
| 3359 |
|
|
| 3360 |
/************************************************* |
/************************************************* |
| 3361 |
|
* Segregate setjmp() * |
| 3362 |
|
*************************************************/ |
| 3363 |
|
|
| 3364 |
|
/* The -Wall option of gcc gives warnings for all local variables when setjmp() |
| 3365 |
|
is used, even if the coding conforms to the rules of ANSI C. To avoid this, we |
| 3366 |
|
hide it in a separate function. This is called only when PCRE_EXTRA is set, |
| 3367 |
|
since it's needed only for the extension \X option, and with any luck, a good |
| 3368 |
|
compiler will spot the tail recursion and compile it efficiently. |
| 3369 |
|
|
| 3370 |
|
Arguments: |
| 3371 |
|
eptr pointer in subject |
| 3372 |
|
ecode position in code |
| 3373 |
|
offset_top current top pointer |
| 3374 |
|
md pointer to "static" info for the match |
| 3375 |
|
|
| 3376 |
|
Returns: TRUE if matched |
| 3377 |
|
*/ |
| 3378 |
|
|
| 3379 |
|
static BOOL |
| 3380 |
|
match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top, |
| 3381 |
|
match_data *match_block) |
| 3382 |
|
{ |
| 3383 |
|
return setjmp(match_block->fail_env) == 0 && |
| 3384 |
|
match(eptr, ecode, offset_top, match_block); |
| 3385 |
|
} |
| 3386 |
|
|
| 3387 |
|
|
| 3388 |
|
|
| 3389 |
|
/************************************************* |
| 3390 |
* Execute a Regular Expression * |
* Execute a Regular Expression * |
| 3391 |
*************************************************/ |
*************************************************/ |
| 3392 |
|
|
| 3413 |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
| 3414 |
const char *subject, int length, int options, int *offsets, int offsetcount) |
const char *subject, int length, int options, int *offsets, int offsetcount) |
| 3415 |
{ |
{ |
| 3416 |
int resetcount; |
int resetcount, ocount; |
|
int ocount = offsetcount; |
|
| 3417 |
int first_char = -1; |
int first_char = -1; |
| 3418 |
match_data match_block; |
match_data match_block; |
| 3419 |
const uschar *start_bits = NULL; |
const uschar *start_bits = NULL; |
| 3420 |
const uschar *start_match = (uschar *)subject; |
const uschar *start_match = (const uschar *)subject; |
| 3421 |
const uschar *end_subject; |
const uschar *end_subject; |
| 3422 |
const real_pcre *re = (const real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
| 3423 |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
| 3424 |
|
BOOL using_temporary_offsets = FALSE; |
| 3425 |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
| 3426 |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
| 3427 |
|
|
| 3450 |
|
|
| 3451 |
/* If the expression has got more back references than the offsets supplied can |
/* If the expression has got more back references than the offsets supplied can |
| 3452 |
hold, we get a temporary bit of working store to use during the matching. |
hold, we get a temporary bit of working store to use during the matching. |
| 3453 |
Otherwise, we can use the vector supplied, rounding down the size of it to a |
Otherwise, we can use the vector supplied, rounding down its size to a multiple |
| 3454 |
multiple of 2. */ |
of 2. */ |
| 3455 |
|
|
| 3456 |
ocount &= (-2); |
ocount = offsetcount & (-2); |
| 3457 |
if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2) |
if (re->top_backref > 0 && re->top_backref >= ocount/2) |
| 3458 |
{ |
{ |
| 3459 |
ocount = re->top_backref * 2 + 2; |
ocount = re->top_backref * 2 + 2; |
| 3460 |
match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int)); |
match_block.offset_vector = (int *)(pcre_malloc)(ocount * sizeof(int)); |
| 3461 |
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; |
if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY; |
| 3462 |
#ifdef DEBUG |
using_temporary_offsets = TRUE; |
| 3463 |
printf("Got memory to hold back references\n"); |
DPRINTF(("Got memory to hold back references\n")); |
|
#endif |
|
| 3464 |
} |
} |
| 3465 |
else match_block.offset_vector = offsets; |
else match_block.offset_vector = offsets; |
| 3466 |
|
|
| 3513 |
|
|
| 3514 |
do |
do |
| 3515 |
{ |
{ |
| 3516 |
|
int rc; |
| 3517 |
register int *iptr = match_block.offset_vector; |
register int *iptr = match_block.offset_vector; |
| 3518 |
register int *iend = iptr + resetcount; |
register int *iend = iptr + resetcount; |
| 3519 |
|
|
| 3555 |
} |
} |
| 3556 |
} |
} |
| 3557 |
|
|
| 3558 |
#ifdef DEBUG |
#ifdef DEBUG /* Sigh. Some compilers never learn. */ |
| 3559 |
printf(">>>> Match against: "); |
printf(">>>> Match against: "); |
| 3560 |
pchars(start_match, end_subject - start_match, TRUE, &match_block); |
pchars(start_match, end_subject - start_match, TRUE, &match_block); |
| 3561 |
printf("\n"); |
printf("\n"); |
| 3562 |
#endif |
#endif |
| 3563 |
|
|
| 3564 |
/* When a match occurs, substrings will be set for all internal extractions; |
/* When a match occurs, substrings will be set for all internal extractions; |
| 3565 |
we just need to set up the whole thing as substring 0 before returning. If |
we just need to set up the whole thing as substring 0 before returning. If |
| 3569 |
if certain parts of the pattern were not used. |
if certain parts of the pattern were not used. |
| 3570 |
|
|
| 3571 |
Before starting the match, we have to set up a longjmp() target to enable |
Before starting the match, we have to set up a longjmp() target to enable |
| 3572 |
the "cut" operation to fail a match completely without backtracking. */ |
the "cut" operation to fail a match completely without backtracking. This |
| 3573 |
|
is done in a separate function to avoid compiler warnings. We need not do |
| 3574 |
|
it unless PCRE_EXTRA is set, since only in that case is the "cut" operation |
| 3575 |
|
enabled. */ |
| 3576 |
|
|
| 3577 |
if (setjmp(match_block.fail_env) == 0 && |
if ((re->options & PCRE_EXTRA) != 0) |
|
match(start_match, re->code, 2, &match_block)) |
|
| 3578 |
{ |
{ |
| 3579 |
int rc; |
if (!match_with_setjmp(start_match, re->code, 2, &match_block)) |
| 3580 |
|
continue; |
| 3581 |
if (ocount != offsetcount) |
} |
| 3582 |
{ |
else if (!match(start_match, re->code, 2, &match_block)) continue; |
|
if (offsetcount >= 4) |
|
|
{ |
|
|
memcpy(offsets + 2, match_block.offset_vector + 2, |
|
|
(offsetcount - 2) * sizeof(int)); |
|
|
#ifdef DEBUG |
|
|
printf("Copied offsets; freeing temporary memory\n"); |
|
|
#endif |
|
|
} |
|
|
if (match_block.end_offset_top > offsetcount) |
|
|
match_block.offset_overflow = TRUE; |
|
| 3583 |
|
|
| 3584 |
#ifdef DEBUG |
/* Copy the offset information from temporary store if necessary */ |
|
printf("Freeing temporary memory\n"); |
|
|
#endif |
|
| 3585 |
|
|
| 3586 |
(pcre_free)(match_block.offset_vector); |
if (using_temporary_offsets) |
| 3587 |
|
{ |
| 3588 |
|
if (offsetcount >= 4) |
| 3589 |
|
{ |
| 3590 |
|
memcpy(offsets + 2, match_block.offset_vector + 2, |
| 3591 |
|
(offsetcount - 2) * sizeof(int)); |
| 3592 |
|
DPRINTF(("Copied offsets from temporary memory\n")); |
| 3593 |
} |
} |
| 3594 |
|
if (match_block.end_offset_top > offsetcount) |
| 3595 |
|
match_block.offset_overflow = TRUE; |
| 3596 |
|
|
| 3597 |
rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2; |
DPRINTF(("Freeing temporary memory\n")); |
| 3598 |
|
(pcre_free)(match_block.offset_vector); |
| 3599 |
|
} |
| 3600 |
|
|
| 3601 |
if (match_block.offset_end < 2) rc = 0; else |
rc = match_block.offset_overflow? 0 : match_block.end_offset_top/2; |
|
{ |
|
|
offsets[0] = start_match - match_block.start_subject; |
|
|
offsets[1] = match_block.end_match_ptr - match_block.start_subject; |
|
|
} |
|
| 3602 |
|
|
| 3603 |
#ifdef DEBUG |
if (match_block.offset_end < 2) rc = 0; else |
| 3604 |
printf(">>>> returning %d\n", rc); |
{ |
| 3605 |
#endif |
offsets[0] = start_match - match_block.start_subject; |
| 3606 |
return rc; |
offsets[1] = match_block.end_match_ptr - match_block.start_subject; |
| 3607 |
} |
} |
| 3608 |
|
|
| 3609 |
|
DPRINTF((">>>> returning %d\n", rc)); |
| 3610 |
|
return rc; |
| 3611 |
} |
} |
| 3612 |
while (!anchored && |
while (!anchored && |
| 3613 |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
| 3614 |
start_match++ < end_subject); |
start_match++ < end_subject); |
| 3615 |
|
|
| 3616 |
#ifdef DEBUG |
if (using_temporary_offsets) |
| 3617 |
printf(">>>> returning %d\n", match_block.errorcode); |
{ |
| 3618 |
#endif |
DPRINTF(("Freeing temporary memory\n")); |
| 3619 |
|
(pcre_free)(match_block.offset_vector); |
| 3620 |
|
} |
| 3621 |
|
|
| 3622 |
|
DPRINTF((">>>> returning %d\n", match_block.errorcode)); |
| 3623 |
|
|
| 3624 |
return match_block.errorcode; |
return match_block.errorcode; |
| 3625 |
} |
} |