| 48 |
/* Text forms of OP_ values and things, for debugging */ |
/* Text forms of OP_ values and things, for debugging */ |
| 49 |
|
|
| 50 |
#ifdef DEBUG |
#ifdef DEBUG |
| 51 |
static char *OP_names[] = { "End", "\\A", "\\B", "\\b", "\\D", "\\d", |
static const char *OP_names[] = { |
| 52 |
|
"End", "\\A", "\\B", "\\b", "\\D", "\\d", |
| 53 |
"\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars", |
"\\S", "\\s", "\\W", "\\w", "Cut", "\\Z", "^", "$", "Any", "chars", |
| 54 |
"not", |
"not", |
| 55 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
| 82 |
|
|
| 83 |
/* Definition to allow mutual recursion */ |
/* Definition to allow mutual recursion */ |
| 84 |
|
|
| 85 |
static BOOL compile_regex(int, int *,uschar **,uschar **,char **); |
static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **); |
| 86 |
|
|
| 87 |
/* Structure for passing "static" information around between the functions |
/* Structure for passing "static" information around between the functions |
| 88 |
doing the matching, so that they are thread-safe. */ |
doing the matching, so that they are thread-safe. */ |
| 99 |
BOOL noteol; /* NOTEOL flag */ |
BOOL noteol; /* NOTEOL flag */ |
| 100 |
BOOL dotall; /* Dot matches any char */ |
BOOL dotall; /* Dot matches any char */ |
| 101 |
BOOL endonly; /* Dollar not before final \n */ |
BOOL endonly; /* Dollar not before final \n */ |
| 102 |
uschar *start_subject; /* Start of the subject string */ |
const uschar *start_subject; /* Start of the subject string */ |
| 103 |
uschar *end_subject; /* End of the subject string */ |
const uschar *end_subject; /* End of the subject string */ |
| 104 |
jmp_buf fail_env; /* Environment for longjump() break out */ |
jmp_buf fail_env; /* Environment for longjump() break out */ |
| 105 |
uschar *end_match_ptr; /* Subject position at end match */ |
const uschar *end_match_ptr; /* Subject position at end match */ |
| 106 |
int end_offset_top; /* Highwater mark at end of match */ |
int end_offset_top; /* Highwater mark at end of match */ |
| 107 |
} match_data; |
} match_data; |
| 108 |
|
|
| 127 |
* Return version string * |
* Return version string * |
| 128 |
*************************************************/ |
*************************************************/ |
| 129 |
|
|
| 130 |
char * |
const char * |
| 131 |
pcre_version(void) |
pcre_version(void) |
| 132 |
{ |
{ |
| 133 |
return PCRE_VERSION; |
return PCRE_VERSION; |
| 157 |
int |
int |
| 158 |
pcre_info(const pcre *external_re, int *optptr, int *first_char) |
pcre_info(const pcre *external_re, int *optptr, int *first_char) |
| 159 |
{ |
{ |
| 160 |
real_pcre *re = (real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
| 161 |
if (re == NULL) return PCRE_ERROR_NULL; |
if (re == NULL) return PCRE_ERROR_NULL; |
| 162 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
| 163 |
if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); |
if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); |
| 361 |
*/ |
*/ |
| 362 |
|
|
| 363 |
static int |
static int |
| 364 |
check_escape(uschar **ptrptr, char **errorptr, int bracount, int options, |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
| 365 |
BOOL isclass) |
int options, BOOL isclass) |
| 366 |
{ |
{ |
| 367 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
| 368 |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
| 369 |
int i; |
int i; |
| 370 |
|
|
| 383 |
|
|
| 384 |
else |
else |
| 385 |
{ |
{ |
| 386 |
uschar *oldptr; |
const uschar *oldptr; |
| 387 |
switch (c) |
switch (c) |
| 388 |
{ |
{ |
| 389 |
/* The handling of escape sequences consisting of a string of digits |
/* The handling of escape sequences consisting of a string of digits |
| 503 |
*/ |
*/ |
| 504 |
|
|
| 505 |
static BOOL |
static BOOL |
| 506 |
is_counted_repeat(uschar *p) |
is_counted_repeat(const uschar *p) |
| 507 |
{ |
{ |
| 508 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
| 509 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
| 538 |
current ptr on error, with errorptr set |
current ptr on error, with errorptr set |
| 539 |
*/ |
*/ |
| 540 |
|
|
| 541 |
static uschar * |
static const uschar * |
| 542 |
read_repeat_counts(uschar *p, int *minp, int *maxp, char **errorptr) |
read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) |
| 543 |
{ |
{ |
| 544 |
int min = 0; |
int min = 0; |
| 545 |
int max = -1; |
int max = -1; |
| 593 |
*/ |
*/ |
| 594 |
|
|
| 595 |
static BOOL |
static BOOL |
| 596 |
compile_branch(int options, int *brackets, uschar **codeptr, uschar **ptrptr, |
compile_branch(int options, int *brackets, uschar **codeptr, |
| 597 |
char **errorptr) |
const uschar **ptrptr, const char **errorptr) |
| 598 |
{ |
{ |
| 599 |
int repeat_type, op_type; |
int repeat_type, op_type; |
| 600 |
int repeat_min, repeat_max; |
int repeat_min, repeat_max; |
| 601 |
int bravalue, length; |
int bravalue, length; |
| 602 |
register int c; |
register int c; |
| 603 |
register uschar *code = *codeptr; |
register uschar *code = *codeptr; |
| 604 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
| 605 |
|
const uschar *oldptr; |
| 606 |
uschar *previous = NULL; |
uschar *previous = NULL; |
|
uschar *oldptr; |
|
| 607 |
uschar class[32]; |
uschar class[32]; |
| 608 |
|
|
| 609 |
/* Switch on next character until the end of the branch */ |
/* Switch on next character until the end of the branch */ |
| 698 |
/* Backslash may introduce a single character, or it may introduce one |
/* Backslash may introduce a single character, or it may introduce one |
| 699 |
of the specials, which just set a flag. Escaped items are checked for |
of the specials, which just set a flag. Escaped items are checked for |
| 700 |
validity in the pre-compiling pass. The sequence \b is a special case. |
validity in the pre-compiling pass. The sequence \b is a special case. |
| 701 |
Inside a class (and only there) it is treated as backslash. Elsewhere |
Inside a class (and only there) it is treated as backspace. Elsewhere |
| 702 |
it marks a word boundary. Other escapes have preset maps ready to |
it marks a word boundary. Other escapes have preset maps ready to |
| 703 |
or into the one we are building. We assume they have more than one |
or into the one we are building. We assume they have more than one |
| 704 |
character in them, so set class_count bigger than one. */ |
character in them, so set class_count bigger than one. */ |
| 1215 |
continue; |
continue; |
| 1216 |
} |
} |
| 1217 |
|
|
| 1218 |
/* Reset and fall through */ |
/* Data character: reset and fall through */ |
| 1219 |
|
|
| 1220 |
ptr = oldptr; |
ptr = oldptr; |
| 1221 |
c = '\\'; |
c = '\\'; |
| 1306 |
*/ |
*/ |
| 1307 |
|
|
| 1308 |
static BOOL |
static BOOL |
| 1309 |
compile_regex(int options, int *brackets, uschar **codeptr, uschar **ptrptr, |
compile_regex(int options, int *brackets, uschar **codeptr, |
| 1310 |
char **errorptr) |
const uschar **ptrptr, const char **errorptr) |
| 1311 |
{ |
{ |
| 1312 |
uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
| 1313 |
uschar *code = *codeptr; |
uschar *code = *codeptr; |
| 1314 |
uschar *start_bracket = code; |
uschar *start_bracket = code; |
| 1315 |
|
|
| 1375 |
*/ |
*/ |
| 1376 |
|
|
| 1377 |
static BOOL |
static BOOL |
| 1378 |
is_anchored(register uschar *code, BOOL multiline) |
is_anchored(register const uschar *code, BOOL multiline) |
| 1379 |
{ |
{ |
| 1380 |
do { |
do { |
| 1381 |
int op = (int)code[3]; |
int op = (int)code[3]; |
| 1404 |
*/ |
*/ |
| 1405 |
|
|
| 1406 |
static BOOL |
static BOOL |
| 1407 |
is_startline(uschar *code) |
is_startline(const uschar *code) |
| 1408 |
{ |
{ |
| 1409 |
do { |
do { |
| 1410 |
if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT) |
if ((int)code[3] >= OP_BRA || code[3] == OP_ASSERT) |
| 1489 |
*/ |
*/ |
| 1490 |
|
|
| 1491 |
pcre * |
pcre * |
| 1492 |
pcre_compile(const char *pattern, int options, char **errorptr, |
pcre_compile(const char *pattern, int options, const char **errorptr, |
| 1493 |
int *erroroffset) |
int *erroroffset) |
| 1494 |
{ |
{ |
| 1495 |
real_pcre *re; |
real_pcre *re; |
| 1499 |
int c, size; |
int c, size; |
| 1500 |
int bracount = 0; |
int bracount = 0; |
| 1501 |
int brastack[200]; |
int brastack[200]; |
|
int brastackptr = 0; |
|
| 1502 |
int top_backref = 0; |
int top_backref = 0; |
| 1503 |
uschar *code, *ptr; |
unsigned int brastackptr = 0; |
| 1504 |
|
uschar *code; |
| 1505 |
|
const uschar *ptr; |
| 1506 |
|
|
| 1507 |
#ifdef DEBUG |
#ifdef DEBUG |
| 1508 |
uschar *code_base, *code_end; |
uschar *code_base, *code_end; |
| 1541 |
if an "extended" flag setting appears late in the pattern. We can't be so |
if an "extended" flag setting appears late in the pattern. We can't be so |
| 1542 |
clever for #-comments. */ |
clever for #-comments. */ |
| 1543 |
|
|
| 1544 |
ptr = (uschar *)(pattern - 1); |
ptr = (const uschar *)(pattern - 1); |
| 1545 |
while ((c = *(++ptr)) != 0) |
while ((c = *(++ptr)) != 0) |
| 1546 |
{ |
{ |
| 1547 |
int min, max; |
int min, max; |
| 1568 |
|
|
| 1569 |
case '\\': |
case '\\': |
| 1570 |
{ |
{ |
| 1571 |
uschar *save_ptr = ptr; |
const uschar *save_ptr = ptr; |
| 1572 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
| 1573 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 1574 |
if (c >= 0) |
if (c >= 0) |
| 1833 |
|
|
| 1834 |
if (c == '\\') |
if (c == '\\') |
| 1835 |
{ |
{ |
| 1836 |
uschar *saveptr = ptr; |
const uschar *saveptr = ptr; |
| 1837 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
| 1838 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 1839 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
| 1881 |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
| 1882 |
of the function here. */ |
of the function here. */ |
| 1883 |
|
|
| 1884 |
ptr = (uschar *)pattern; |
ptr = (const uschar *)pattern; |
| 1885 |
code = re->code; |
code = re->code; |
| 1886 |
*code = OP_BRA; |
*code = OP_BRA; |
| 1887 |
bracount = 0; |
bracount = 0; |
| 1908 |
{ |
{ |
| 1909 |
(pcre_free)(re); |
(pcre_free)(re); |
| 1910 |
PCRE_ERROR_RETURN: |
PCRE_ERROR_RETURN: |
| 1911 |
*erroroffset = ptr - (uschar *)pattern; |
*erroroffset = ptr - (const uschar *)pattern; |
| 1912 |
return NULL; |
return NULL; |
| 1913 |
} |
} |
| 1914 |
|
|
| 2206 |
*/ |
*/ |
| 2207 |
|
|
| 2208 |
static BOOL |
static BOOL |
| 2209 |
match_ref(int number, register uschar *eptr, int length, match_data *md) |
match_ref(int number, register const uschar *eptr, int length, match_data *md) |
| 2210 |
{ |
{ |
| 2211 |
uschar *p = md->start_subject + md->offset_vector[number]; |
const uschar *p = md->start_subject + md->offset_vector[number]; |
| 2212 |
|
|
| 2213 |
#ifdef DEBUG |
#ifdef DEBUG |
| 2214 |
if (eptr >= md->end_subject) |
if (eptr >= md->end_subject) |
| 2255 |
*/ |
*/ |
| 2256 |
|
|
| 2257 |
static BOOL |
static BOOL |
| 2258 |
match(register uschar *eptr, register uschar *ecode, int offset_top, |
match(register const uschar *eptr, register const uschar *ecode, int offset_top, |
| 2259 |
match_data *md) |
match_data *md) |
| 2260 |
{ |
{ |
| 2261 |
for (;;) |
for (;;) |
| 2263 |
int min, max, ctype; |
int min, max, ctype; |
| 2264 |
register int i; |
register int i; |
| 2265 |
register int c; |
register int c; |
| 2266 |
BOOL minimize; |
BOOL minimize = FALSE; |
| 2267 |
|
|
| 2268 |
/* Opening bracket. Check the alternative branches in turn, failing if none |
/* Opening bracket. Check the alternative branches in turn, failing if none |
| 2269 |
match. We have to set the start offset if required and there is space |
match. We have to set the start offset if required and there is space |
| 2276 |
if ((int)*ecode >= OP_BRA) |
if ((int)*ecode >= OP_BRA) |
| 2277 |
{ |
{ |
| 2278 |
int number = (*ecode - OP_BRA) << 1; |
int number = (*ecode - OP_BRA) << 1; |
| 2279 |
int save_offset1, save_offset2; |
int save_offset1 = 0, save_offset2 = 0; |
| 2280 |
|
|
| 2281 |
#ifdef DEBUG |
#ifdef DEBUG |
| 2282 |
printf("start bracket %d\n", number/2); |
printf("start bracket %d\n", number/2); |
| 2403 |
|
|
| 2404 |
case OP_BRAZERO: |
case OP_BRAZERO: |
| 2405 |
{ |
{ |
| 2406 |
uschar *next = ecode+1; |
const uschar *next = ecode+1; |
| 2407 |
if (match(eptr, next, offset_top, md)) return TRUE; |
if (match(eptr, next, offset_top, md)) return TRUE; |
| 2408 |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
| 2409 |
ecode = next + 3; |
ecode = next + 3; |
| 2412 |
|
|
| 2413 |
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
| 2414 |
{ |
{ |
| 2415 |
uschar *next = ecode+1; |
const uschar *next = ecode+1; |
| 2416 |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
| 2417 |
if (match(eptr, next+3, offset_top, md)) return TRUE; |
if (match(eptr, next+3, offset_top, md)) return TRUE; |
| 2418 |
ecode++; |
ecode++; |
| 2428 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 2429 |
{ |
{ |
| 2430 |
int number; |
int number; |
| 2431 |
uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |
const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |
| 2432 |
|
|
| 2433 |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE) |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || *prev == OP_ONCE) |
| 2434 |
{ |
{ |
| 2677 |
|
|
| 2678 |
else |
else |
| 2679 |
{ |
{ |
| 2680 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
| 2681 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 2682 |
{ |
{ |
| 2683 |
if (!match_ref(number, eptr, length, md)) break; |
if (!match_ref(number, eptr, length, md)) break; |
| 2701 |
|
|
| 2702 |
case OP_CLASS: |
case OP_CLASS: |
| 2703 |
{ |
{ |
| 2704 |
uschar *data = ecode + 1; /* Save for matching */ |
const uschar *data = ecode + 1; /* Save for matching */ |
| 2705 |
ecode += 33; /* Advance past the item */ |
ecode += 33; /* Advance past the item */ |
| 2706 |
|
|
| 2707 |
switch (*ecode) |
switch (*ecode) |
| 2708 |
{ |
{ |
| 2785 |
|
|
| 2786 |
else |
else |
| 2787 |
{ |
{ |
| 2788 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
| 2789 |
for (i = min; i < max; eptr++, i++) |
for (i = min; i < max; eptr++, i++) |
| 2790 |
{ |
{ |
| 2791 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
| 2903 |
} |
} |
| 2904 |
else |
else |
| 2905 |
{ |
{ |
| 2906 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
| 2907 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 2908 |
{ |
{ |
| 2909 |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
| 2933 |
} |
} |
| 2934 |
else |
else |
| 2935 |
{ |
{ |
| 2936 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
| 2937 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 2938 |
{ |
{ |
| 2939 |
if (eptr >= md->end_subject || c != *eptr) break; |
if (eptr >= md->end_subject || c != *eptr) break; |
| 3030 |
} |
} |
| 3031 |
else |
else |
| 3032 |
{ |
{ |
| 3033 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
| 3034 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 3035 |
{ |
{ |
| 3036 |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
| 3060 |
} |
} |
| 3061 |
else |
else |
| 3062 |
{ |
{ |
| 3063 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
| 3064 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 3065 |
{ |
{ |
| 3066 |
if (eptr >= md->end_subject || c == *eptr) break; |
if (eptr >= md->end_subject || c == *eptr) break; |
| 3177 |
|
|
| 3178 |
else |
else |
| 3179 |
{ |
{ |
| 3180 |
uschar *pp = eptr; |
const uschar *pp = eptr; |
| 3181 |
switch(ctype) |
switch(ctype) |
| 3182 |
{ |
{ |
| 3183 |
case OP_ANY: |
case OP_ANY: |
| 3309 |
int ocount = offsetcount; |
int ocount = offsetcount; |
| 3310 |
int first_char = -1; |
int first_char = -1; |
| 3311 |
match_data match_block; |
match_data match_block; |
| 3312 |
uschar *start_bits = NULL; |
const uschar *start_bits = NULL; |
| 3313 |
uschar *start_match = (uschar *)subject; |
const uschar *start_match = (uschar *)subject; |
| 3314 |
uschar *end_subject; |
const uschar *end_subject; |
| 3315 |
real_pcre *re = (real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
| 3316 |
real_pcre_extra *extra = (real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
| 3317 |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0; |
| 3318 |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
BOOL startline = (re->options & PCRE_STARTLINE) != 0; |
| 3319 |
|
|
| 3323 |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
| 3324 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
| 3325 |
|
|
| 3326 |
match_block.start_subject = (uschar *)subject; |
match_block.start_subject = (const uschar *)subject; |
| 3327 |
match_block.end_subject = match_block.start_subject + length; |
match_block.end_subject = match_block.start_subject + length; |
| 3328 |
end_subject = match_block.end_subject; |
end_subject = match_block.end_subject; |
| 3329 |
|
|