| 241 |
compiling for Virtual Pascal, things are done differently (see pcre.in). */ |
compiling for Virtual Pascal, things are done differently (see pcre.in). */ |
| 242 |
|
|
| 243 |
#ifndef VPCOMPAT |
#ifndef VPCOMPAT |
| 244 |
|
#ifdef __cplusplus |
| 245 |
|
extern "C" void *(*pcre_malloc)(size_t) = malloc; |
| 246 |
|
extern "C" void (*pcre_free)(void *) = free; |
| 247 |
|
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL; |
| 248 |
|
#else |
| 249 |
void *(*pcre_malloc)(size_t) = malloc; |
void *(*pcre_malloc)(size_t) = malloc; |
| 250 |
void (*pcre_free)(void *) = free; |
void (*pcre_free)(void *) = free; |
| 251 |
int (*pcre_callout)(pcre_callout_block *) = NULL; |
int (*pcre_callout)(pcre_callout_block *) = NULL; |
| 252 |
#endif |
#endif |
| 253 |
|
#endif |
| 254 |
|
|
| 255 |
|
|
| 256 |
/************************************************* |
/************************************************* |
| 517 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
| 518 |
|
|
| 519 |
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) |
if (extra_data != NULL && (extra_data->flags & PCRE_EXTRA_STUDY_DATA) != 0) |
| 520 |
study = extra_data->study_data; |
study = (const pcre_study_data *)extra_data->study_data; |
| 521 |
|
|
| 522 |
switch (what) |
switch (what) |
| 523 |
{ |
{ |
| 598 |
switch (what) |
switch (what) |
| 599 |
{ |
{ |
| 600 |
case PCRE_CONFIG_UTF8: |
case PCRE_CONFIG_UTF8: |
| 601 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 602 |
*((int *)where) = 1; |
*((int *)where) = 1; |
| 603 |
#else |
#else |
| 604 |
*((int *)where) = 0; |
*((int *)where) = 0; |
| 605 |
#endif |
#endif |
| 606 |
break; |
break; |
| 607 |
|
|
| 608 |
case PCRE_CONFIG_NEWLINE: |
case PCRE_CONFIG_NEWLINE: |
| 675 |
bracount number of previous extracting brackets |
bracount number of previous extracting brackets |
| 676 |
options the options bits |
options the options bits |
| 677 |
isclass TRUE if inside a character class |
isclass TRUE if inside a character class |
|
cd pointer to char tables block |
|
| 678 |
|
|
| 679 |
Returns: zero or positive => a data character |
Returns: zero or positive => a data character |
| 680 |
negative => a special escape sequence |
negative => a special escape sequence |
| 683 |
|
|
| 684 |
static int |
static int |
| 685 |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
| 686 |
int options, BOOL isclass, compile_data *cd) |
int options, BOOL isclass) |
| 687 |
{ |
{ |
| 688 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
| 689 |
int c, i; |
int c, i; |
| 806 |
c = 0; |
c = 0; |
| 807 |
while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0) |
while (i++ < 2 && (digitab[ptr[1]] & ctype_xdigit) != 0) |
| 808 |
{ |
{ |
| 809 |
int cc = *(++ptr); |
int cc; /* Some compilers don't like ++ */ |
| 810 |
|
cc = *(++ptr); /* in initializers */ |
| 811 |
if (cc >= 'a') cc -= 32; /* Convert to upper case */ |
if (cc >= 'a') cc -= 32; /* Convert to upper case */ |
| 812 |
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); |
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); |
| 813 |
} |
} |
| 864 |
|
|
| 865 |
Arguments: |
Arguments: |
| 866 |
p pointer to the first char after '{' |
p pointer to the first char after '{' |
|
cd pointer to char tables block |
|
| 867 |
|
|
| 868 |
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
| 869 |
*/ |
*/ |
| 870 |
|
|
| 871 |
static BOOL |
static BOOL |
| 872 |
is_counted_repeat(const uschar *p, compile_data *cd) |
is_counted_repeat(const uschar *p) |
| 873 |
{ |
{ |
| 874 |
if ((digitab[*p++] && ctype_digit) == 0) return FALSE; |
if ((digitab[*p++] && ctype_digit) == 0) return FALSE; |
| 875 |
while ((digitab[*p] & ctype_digit) != 0) p++; |
while ((digitab[*p] & ctype_digit) != 0) p++; |
| 900 |
maxp pointer to int for max |
maxp pointer to int for max |
| 901 |
returned as -1 if no max |
returned as -1 if no max |
| 902 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
|
cd pointer to character tables clock |
|
| 903 |
|
|
| 904 |
Returns: pointer to '}' on success; |
Returns: pointer to '}' on success; |
| 905 |
current ptr on error, with errorptr set |
current ptr on error, with errorptr set |
| 906 |
*/ |
*/ |
| 907 |
|
|
| 908 |
static const uschar * |
static const uschar * |
| 909 |
read_repeat_counts(const uschar *p, int *minp, int *maxp, |
read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) |
|
const char **errorptr, compile_data *cd) |
|
| 910 |
{ |
{ |
| 911 |
int min = 0; |
int min = 0; |
| 912 |
int max = -1; |
int max = -1; |
| 1796 |
|
|
| 1797 |
if (c == '\\') |
if (c == '\\') |
| 1798 |
{ |
{ |
| 1799 |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
| 1800 |
if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */ |
if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */ |
| 1801 |
|
|
| 1802 |
if (-c == ESC_Q) /* Handle start of quoted string */ |
if (-c == ESC_Q) /* Handle start of quoted string */ |
| 1885 |
if (d == '\\') |
if (d == '\\') |
| 1886 |
{ |
{ |
| 1887 |
const uschar *oldptr = ptr; |
const uschar *oldptr = ptr; |
| 1888 |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
| 1889 |
|
|
| 1890 |
/* \b is backslash; any other special means the '-' was literal */ |
/* \b is backslash; any other special means the '-' was literal */ |
| 1891 |
|
|
| 2094 |
/* Various kinds of repeat */ |
/* Various kinds of repeat */ |
| 2095 |
|
|
| 2096 |
case '{': |
case '{': |
| 2097 |
if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
| 2098 |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd); |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr); |
| 2099 |
if (*errorptr != NULL) goto FAILED; |
if (*errorptr != NULL) goto FAILED; |
| 2100 |
goto REPEAT; |
goto REPEAT; |
| 2101 |
|
|
| 3042 |
|
|
| 3043 |
case '\\': |
case '\\': |
| 3044 |
tempptr = ptr; |
tempptr = ptr; |
| 3045 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
| 3046 |
|
|
| 3047 |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
| 3048 |
are arranged to be the negation of the corresponding OP_values. For the |
are arranged to be the negation of the corresponding OP_values. For the |
| 3145 |
if (c == '\\') |
if (c == '\\') |
| 3146 |
{ |
{ |
| 3147 |
tempptr = ptr; |
tempptr = ptr; |
| 3148 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
| 3149 |
if (c < 0) { ptr = tempptr; break; } |
if (c < 0) { ptr = tempptr; break; } |
| 3150 |
|
|
| 3151 |
/* If a character is > 127 in UTF-8 mode, we have to turn it into |
/* If a character is > 127 in UTF-8 mode, we have to turn it into |
| 3730 |
|
|
| 3731 |
|
|
| 3732 |
|
|
| 3733 |
|
#ifdef SUPPORT_UTF8 |
| 3734 |
|
/************************************************* |
| 3735 |
|
* Validate a UTF-8 string * |
| 3736 |
|
*************************************************/ |
| 3737 |
|
|
| 3738 |
|
/* This function is called (optionally) at the start of compile or match, to |
| 3739 |
|
validate that a supposed UTF-8 string is actually valid. The early check means |
| 3740 |
|
that subsequent code can assume it is dealing with a valid string. The check |
| 3741 |
|
can be turned off for maximum performance, but then consequences of supplying |
| 3742 |
|
an invalid string are then undefined. |
| 3743 |
|
|
| 3744 |
|
Arguments: |
| 3745 |
|
string points to the string |
| 3746 |
|
length length of string, or -1 if the string is zero-terminated |
| 3747 |
|
|
| 3748 |
|
Returns: < 0 if the string is a valid UTF-8 string |
| 3749 |
|
>= 0 otherwise; the value is the offset of the bad byte |
| 3750 |
|
*/ |
| 3751 |
|
|
| 3752 |
|
static int |
| 3753 |
|
valid_utf8(const uschar *string, int length) |
| 3754 |
|
{ |
| 3755 |
|
register const uschar *p; |
| 3756 |
|
|
| 3757 |
|
if (length < 0) |
| 3758 |
|
{ |
| 3759 |
|
for (p = string; *p != 0; p++); |
| 3760 |
|
length = p - string; |
| 3761 |
|
} |
| 3762 |
|
|
| 3763 |
|
for (p = string; length-- > 0; p++) |
| 3764 |
|
{ |
| 3765 |
|
int ab; |
| 3766 |
|
if (*p < 128) continue; |
| 3767 |
|
if ((*p & 0xc0) != 0xc0) return p - string; |
| 3768 |
|
ab = utf8_table4[*p & 0x3f]; /* Number of additional bytes */ |
| 3769 |
|
if (length < ab) return p - string; |
| 3770 |
|
while (ab-- > 0) |
| 3771 |
|
{ |
| 3772 |
|
if ((*(++p) & 0xc0) != 0x80) return p - string; |
| 3773 |
|
length--; |
| 3774 |
|
} |
| 3775 |
|
} |
| 3776 |
|
|
| 3777 |
|
return -1; |
| 3778 |
|
} |
| 3779 |
|
#endif |
| 3780 |
|
|
| 3781 |
|
|
| 3782 |
|
|
| 3783 |
/************************************************* |
/************************************************* |
| 3784 |
* Compile a Regular Expression * |
* Compile a Regular Expression * |
| 3785 |
*************************************************/ |
*************************************************/ |
| 3846 |
|
|
| 3847 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 3848 |
utf8 = (options & PCRE_UTF8) != 0; |
utf8 = (options & PCRE_UTF8) != 0; |
| 3849 |
|
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && |
| 3850 |
|
(*erroroffset = valid_utf8((uschar *)pattern, -1)) >= 0) |
| 3851 |
|
{ |
| 3852 |
|
*errorptr = ERR44; |
| 3853 |
|
return NULL; |
| 3854 |
|
} |
| 3855 |
#else |
#else |
| 3856 |
if ((options & PCRE_UTF8) != 0) |
if ((options & PCRE_UTF8) != 0) |
| 3857 |
{ |
{ |
| 3933 |
case '\\': |
case '\\': |
| 3934 |
{ |
{ |
| 3935 |
const uschar *save_ptr = ptr; |
const uschar *save_ptr = ptr; |
| 3936 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
| 3937 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 3938 |
if (c >= 0) |
if (c >= 0) |
| 3939 |
{ |
{ |
| 3969 |
if (refnum > compile_block.top_backref) |
if (refnum > compile_block.top_backref) |
| 3970 |
compile_block.top_backref = refnum; |
compile_block.top_backref = refnum; |
| 3971 |
length += 2; /* For single back reference */ |
length += 2; /* For single back reference */ |
| 3972 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
if (ptr[1] == '{' && is_counted_repeat(ptr+2)) |
| 3973 |
{ |
{ |
| 3974 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
| 3975 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 3976 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
| 3977 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
| 4001 |
class, or back reference. */ |
class, or back reference. */ |
| 4002 |
|
|
| 4003 |
case '{': |
case '{': |
| 4004 |
if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
| 4005 |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block); |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr); |
| 4006 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 4007 |
|
|
| 4008 |
/* These special cases just insert one extra opcode */ |
/* These special cases just insert one extra opcode */ |
| 4098 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 4099 |
int prevchar = ptr[-1]; |
int prevchar = ptr[-1]; |
| 4100 |
#endif |
#endif |
| 4101 |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE, |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE); |
|
&compile_block); |
|
| 4102 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 4103 |
|
|
| 4104 |
/* \b is backspace inside a class */ |
/* \b is backspace inside a class */ |
| 4209 |
|
|
| 4210 |
/* A repeat needs either 1 or 5 bytes. */ |
/* A repeat needs either 1 or 5 bytes. */ |
| 4211 |
|
|
| 4212 |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
| 4213 |
{ |
{ |
| 4214 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
| 4215 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 4216 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
| 4217 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
| 4563 |
/* Leave ptr at the final char; for read_repeat_counts this happens |
/* Leave ptr at the final char; for read_repeat_counts this happens |
| 4564 |
automatically; for the others we need an increment. */ |
automatically; for the others we need an increment. */ |
| 4565 |
|
|
| 4566 |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block)) |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
| 4567 |
{ |
{ |
| 4568 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
| 4569 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 4570 |
} |
} |
| 4571 |
else if (c == '*') { min = 0; max = -1; ptr++; } |
else if (c == '*') { min = 0; max = -1; ptr++; } |
| 4654 |
if (c == '\\') |
if (c == '\\') |
| 4655 |
{ |
{ |
| 4656 |
const uschar *saveptr = ptr; |
const uschar *saveptr = ptr; |
| 4657 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
|
&compile_block); |
|
| 4658 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 4659 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
| 4660 |
|
|
| 7364 |
{ |
{ |
| 7365 |
register unsigned int flags = extra_data->flags; |
register unsigned int flags = extra_data->flags; |
| 7366 |
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) |
if ((flags & PCRE_EXTRA_STUDY_DATA) != 0) |
| 7367 |
study = extra_data->study_data; |
study = (const pcre_study_data *)extra_data->study_data; |
| 7368 |
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) |
if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) |
| 7369 |
match_block.match_limit = extra_data->match_limit; |
match_block.match_limit = extra_data->match_limit; |
| 7370 |
if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) |
if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0) |
| 7397 |
match_block.lcc = re->tables + lcc_offset; |
match_block.lcc = re->tables + lcc_offset; |
| 7398 |
match_block.ctypes = re->tables + ctypes_offset; |
match_block.ctypes = re->tables + ctypes_offset; |
| 7399 |
|
|
| 7400 |
|
/* Check a UTF-8 string if required. Unfortunately there's no way of passing |
| 7401 |
|
back the character offset. */ |
| 7402 |
|
|
| 7403 |
|
#ifdef SUPPORT_UTF8 |
| 7404 |
|
if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && |
| 7405 |
|
valid_utf8((uschar *)subject, length) >= 0) |
| 7406 |
|
return PCRE_ERROR_BADUTF8; |
| 7407 |
|
#endif |
| 7408 |
|
|
| 7409 |
/* The ims options can vary during the matching as a result of the presence |
/* The ims options can vary during the matching as a result of the presence |
| 7410 |
of (?ims) items in the pattern. They are kept in a local variable so that |
of (?ims) items in the pattern. They are kept in a local variable so that |
| 7411 |
restoring at the exit of a group is easy. */ |
restoring at the exit of a group is easy. */ |