| 107 |
|
|
| 108 |
static BOOL |
static BOOL |
| 109 |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
| 110 |
BOOL, int); |
BOOL, int, compile_data *); |
|
|
|
|
/* Structure for passing "static" information around between the functions |
|
|
doing the matching, so that they are thread-safe. */ |
|
|
|
|
|
typedef struct match_data { |
|
|
int errorcode; /* As it says */ |
|
|
int *offset_vector; /* Offset vector */ |
|
|
int offset_end; /* One past the end */ |
|
|
int offset_max; /* The maximum usable for return data */ |
|
|
BOOL offset_overflow; /* Set if too many extractions */ |
|
|
BOOL notbol; /* NOTBOL flag */ |
|
|
BOOL noteol; /* NOTEOL flag */ |
|
|
BOOL endonly; /* Dollar not before final \n */ |
|
|
const uschar *start_subject; /* Start of the subject string */ |
|
|
const uschar *end_subject; /* End of the subject string */ |
|
|
const uschar *end_match_ptr; /* Subject position at end match */ |
|
|
int end_offset_top; /* Highwater mark at end of match */ |
|
|
} match_data; |
|
| 111 |
|
|
| 112 |
|
|
| 113 |
|
|
| 127 |
|
|
| 128 |
|
|
| 129 |
/************************************************* |
/************************************************* |
| 130 |
|
* Default character tables * |
| 131 |
|
*************************************************/ |
| 132 |
|
|
| 133 |
|
/* A default set of character tables is included in the PCRE binary. Its source |
| 134 |
|
is built by the maketables auxiliary program, which uses the default C ctypes |
| 135 |
|
functions, and put in the file chartables.c. These tables are used by PCRE |
| 136 |
|
whenever the caller of pcre_compile() does not provide an alternate set of |
| 137 |
|
tables. */ |
| 138 |
|
|
| 139 |
|
#include "chartables.c" |
| 140 |
|
|
| 141 |
|
|
| 142 |
|
|
| 143 |
|
/************************************************* |
| 144 |
* Return version string * |
* Return version string * |
| 145 |
*************************************************/ |
*************************************************/ |
| 146 |
|
|
| 233 |
bracount number of previous extracting brackets |
bracount number of previous extracting brackets |
| 234 |
options the options bits |
options the options bits |
| 235 |
isclass TRUE if inside a character class |
isclass TRUE if inside a character class |
| 236 |
|
cd pointer to char tables block |
| 237 |
|
|
| 238 |
Returns: zero or positive => a data character |
Returns: zero or positive => a data character |
| 239 |
negative => a special escape sequence |
negative => a special escape sequence |
| 242 |
|
|
| 243 |
static int |
static int |
| 244 |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
check_escape(const uschar **ptrptr, const char **errorptr, int bracount, |
| 245 |
int options, BOOL isclass) |
int options, BOOL isclass, compile_data *cd) |
| 246 |
{ |
{ |
| 247 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
| 248 |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
| 285 |
{ |
{ |
| 286 |
oldptr = ptr; |
oldptr = ptr; |
| 287 |
c -= '0'; |
c -= '0'; |
| 288 |
while ((pcre_ctypes[ptr[1]] & ctype_digit) != 0) |
while ((cd->ctypes[ptr[1]] & ctype_digit) != 0) |
| 289 |
c = c * 10 + *(++ptr) - '0'; |
c = c * 10 + *(++ptr) - '0'; |
| 290 |
if (c < 10 || c <= bracount) |
if (c < 10 || c <= bracount) |
| 291 |
{ |
{ |
| 311 |
|
|
| 312 |
case '0': |
case '0': |
| 313 |
c -= '0'; |
c -= '0'; |
| 314 |
while(i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_digit) != 0 && |
while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 && |
| 315 |
ptr[1] != '8' && ptr[1] != '9') |
ptr[1] != '8' && ptr[1] != '9') |
| 316 |
c = c * 8 + *(++ptr) - '0'; |
c = c * 8 + *(++ptr) - '0'; |
| 317 |
break; |
break; |
| 320 |
|
|
| 321 |
case 'x': |
case 'x': |
| 322 |
c = 0; |
c = 0; |
| 323 |
while (i++ < 2 && (pcre_ctypes[ptr[1]] & ctype_xdigit) != 0) |
while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) |
| 324 |
{ |
{ |
| 325 |
ptr++; |
ptr++; |
| 326 |
c = c * 16 + pcre_lcc[*ptr] - |
c = c * 16 + cd->lcc[*ptr] - |
| 327 |
(((pcre_ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); |
(((cd->ctypes[*ptr] & ctype_digit) != 0)? '0' : 'W'); |
| 328 |
} |
} |
| 329 |
break; |
break; |
| 330 |
|
|
| 338 |
|
|
| 339 |
/* A letter is upper-cased; then the 0x40 bit is flipped */ |
/* A letter is upper-cased; then the 0x40 bit is flipped */ |
| 340 |
|
|
| 341 |
if (c >= 'a' && c <= 'z') c = pcre_fcc[c]; |
if (c >= 'a' && c <= 'z') c = cd->fcc[c]; |
| 342 |
c ^= 0x40; |
c ^= 0x40; |
| 343 |
break; |
break; |
| 344 |
|
|
| 345 |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
| 346 |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
| 347 |
for Perl compatibility, it is a literal. */ |
for Perl compatibility, it is a literal. This code looks a bit odd, but |
| 348 |
|
there used to be some cases other than the default, and there may be again |
| 349 |
|
in future, so I haven't "optimized" it. */ |
| 350 |
|
|
| 351 |
default: |
default: |
| 352 |
if ((options & PCRE_EXTRA) != 0) switch(c) |
if ((options & PCRE_EXTRA) != 0) switch(c) |
| 376 |
|
|
| 377 |
Arguments: |
Arguments: |
| 378 |
p pointer to the first char after '{' |
p pointer to the first char after '{' |
| 379 |
|
cd pointer to char tables block |
| 380 |
|
|
| 381 |
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
| 382 |
*/ |
*/ |
| 383 |
|
|
| 384 |
static BOOL |
static BOOL |
| 385 |
is_counted_repeat(const uschar *p) |
is_counted_repeat(const uschar *p, compile_data *cd) |
| 386 |
{ |
{ |
| 387 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; |
| 388 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((cd->ctypes[*p] & ctype_digit) != 0) p++; |
| 389 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
| 390 |
|
|
| 391 |
if (*p++ != ',') return FALSE; |
if (*p++ != ',') return FALSE; |
| 392 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
| 393 |
|
|
| 394 |
if ((pcre_ctypes[*p++] & ctype_digit) == 0) return FALSE; |
if ((cd->ctypes[*p++] & ctype_digit) == 0) return FALSE; |
| 395 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) p++; |
while ((cd->ctypes[*p] & ctype_digit) != 0) p++; |
| 396 |
return (*p == '}'); |
return (*p == '}'); |
| 397 |
} |
} |
| 398 |
|
|
| 412 |
maxp pointer to int for max |
maxp pointer to int for max |
| 413 |
returned as -1 if no max |
returned as -1 if no max |
| 414 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
| 415 |
|
cd pointer to character tables clock |
| 416 |
|
|
| 417 |
Returns: pointer to '}' on success; |
Returns: pointer to '}' on success; |
| 418 |
current ptr on error, with errorptr set |
current ptr on error, with errorptr set |
| 419 |
*/ |
*/ |
| 420 |
|
|
| 421 |
static const uschar * |
static const uschar * |
| 422 |
read_repeat_counts(const uschar *p, int *minp, int *maxp, const char **errorptr) |
read_repeat_counts(const uschar *p, int *minp, int *maxp, |
| 423 |
|
const char **errorptr, compile_data *cd) |
| 424 |
{ |
{ |
| 425 |
int min = 0; |
int min = 0; |
| 426 |
int max = -1; |
int max = -1; |
| 427 |
|
|
| 428 |
while ((pcre_ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
while ((cd->ctypes[*p] & ctype_digit) != 0) min = min * 10 + *p++ - '0'; |
| 429 |
|
|
| 430 |
if (*p == '}') max = min; else |
if (*p == '}') max = min; else |
| 431 |
{ |
{ |
| 432 |
if (*(++p) != '}') |
if (*(++p) != '}') |
| 433 |
{ |
{ |
| 434 |
max = 0; |
max = 0; |
| 435 |
while((pcre_ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
while((cd->ctypes[*p] & ctype_digit) != 0) max = max * 10 + *p++ - '0'; |
| 436 |
if (max < min) |
if (max < min) |
| 437 |
{ |
{ |
| 438 |
*errorptr = ERR4; |
*errorptr = ERR4; |
| 617 |
/* Scan the pattern, compiling it into the code vector. |
/* Scan the pattern, compiling it into the code vector. |
| 618 |
|
|
| 619 |
Arguments: |
Arguments: |
| 620 |
options the option bits |
options the option bits |
| 621 |
brackets points to number of brackets used |
brackets points to number of brackets used |
| 622 |
code points to the pointer to the current code point |
code points to the pointer to the current code point |
| 623 |
ptrptr points to the current pattern pointer |
ptrptr points to the current pattern pointer |
| 624 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
| 625 |
optchanged set to the value of the last OP_OPT item compiled |
optchanged set to the value of the last OP_OPT item compiled |
| 626 |
|
cd contains pointers to tables |
| 627 |
|
|
| 628 |
Returns: TRUE on success |
Returns: TRUE on success |
| 629 |
FALSE, with *errorptr set on error |
FALSE, with *errorptr set on error |
| 630 |
*/ |
*/ |
| 631 |
|
|
| 632 |
static BOOL |
static BOOL |
| 633 |
compile_branch(int options, int *brackets, uschar **codeptr, |
compile_branch(int options, int *brackets, uschar **codeptr, |
| 634 |
const uschar **ptrptr, const char **errorptr, int *optchanged) |
const uschar **ptrptr, const char **errorptr, int *optchanged, |
| 635 |
|
compile_data *cd) |
| 636 |
{ |
{ |
| 637 |
int repeat_type, op_type; |
int repeat_type, op_type; |
| 638 |
int repeat_min, repeat_max; |
int repeat_min, repeat_max; |
| 664 |
c = *ptr; |
c = *ptr; |
| 665 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
| 666 |
{ |
{ |
| 667 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
| 668 |
if (c == '#') |
if (c == '#') |
| 669 |
{ |
{ |
| 670 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
| 752 |
|
|
| 753 |
if (c == '\\') |
if (c == '\\') |
| 754 |
{ |
{ |
| 755 |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
c = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
| 756 |
if (-c == ESC_b) c = '\b'; |
if (-c == ESC_b) c = '\b'; |
| 757 |
else if (c < 0) |
else if (c < 0) |
| 758 |
{ |
{ |
| 759 |
|
register const uschar *cbits = cd->cbits; |
| 760 |
class_charcount = 10; |
class_charcount = 10; |
| 761 |
switch (-c) |
switch (-c) |
| 762 |
{ |
{ |
| 763 |
case ESC_d: |
case ESC_d: |
| 764 |
for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_digit]; |
| 765 |
continue; |
continue; |
| 766 |
|
|
| 767 |
case ESC_D: |
case ESC_D: |
| 768 |
for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_digit]; |
| 769 |
continue; |
continue; |
| 770 |
|
|
| 771 |
case ESC_w: |
case ESC_w: |
| 772 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) |
| 773 |
class[c] |= (pcre_cbits[c] | pcre_cbits[c+cbit_word]); |
class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]); |
| 774 |
continue; |
continue; |
| 775 |
|
|
| 776 |
case ESC_W: |
case ESC_W: |
| 777 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) |
| 778 |
class[c] |= ~(pcre_cbits[c] | pcre_cbits[c+cbit_word]); |
class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]); |
| 779 |
continue; |
continue; |
| 780 |
|
|
| 781 |
case ESC_s: |
case ESC_s: |
| 782 |
for (c = 0; c < 32; c++) class[c] |= pcre_cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_space]; |
| 783 |
continue; |
continue; |
| 784 |
|
|
| 785 |
case ESC_S: |
case ESC_S: |
| 786 |
for (c = 0; c < 32; c++) class[c] |= ~pcre_cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_space]; |
| 787 |
continue; |
continue; |
| 788 |
|
|
| 789 |
default: |
default: |
| 815 |
|
|
| 816 |
if (d == '\\') |
if (d == '\\') |
| 817 |
{ |
{ |
| 818 |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE); |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
| 819 |
if (d < 0) |
if (d < 0) |
| 820 |
{ |
{ |
| 821 |
if (d == -ESC_b) d = '\b'; else |
if (d == -ESC_b) d = '\b'; else |
| 837 |
class[c/8] |= (1 << (c&7)); |
class[c/8] |= (1 << (c&7)); |
| 838 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
| 839 |
{ |
{ |
| 840 |
int uc = pcre_fcc[c]; /* flip case */ |
int uc = cd->fcc[c]; /* flip case */ |
| 841 |
class[uc/8] |= (1 << (uc&7)); |
class[uc/8] |= (1 << (uc&7)); |
| 842 |
} |
} |
| 843 |
class_charcount++; /* in case a one-char range */ |
class_charcount++; /* in case a one-char range */ |
| 852 |
class [c/8] |= (1 << (c&7)); |
class [c/8] |= (1 << (c&7)); |
| 853 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
| 854 |
{ |
{ |
| 855 |
c = pcre_fcc[c]; /* flip case */ |
c = cd->fcc[c]; /* flip case */ |
| 856 |
class[c/8] |= (1 << (c&7)); |
class[c/8] |= (1 << (c&7)); |
| 857 |
} |
} |
| 858 |
class_charcount++; |
class_charcount++; |
| 899 |
/* Various kinds of repeat */ |
/* Various kinds of repeat */ |
| 900 |
|
|
| 901 |
case '{': |
case '{': |
| 902 |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1, cd)) goto NORMAL_CHAR; |
| 903 |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr); |
ptr = read_repeat_counts(ptr+1, &repeat_min, &repeat_max, errorptr, cd); |
| 904 |
if (*errorptr != NULL) goto FAILED; |
if (*errorptr != NULL) goto FAILED; |
| 905 |
goto REPEAT; |
goto REPEAT; |
| 906 |
|
|
| 1196 |
|
|
| 1197 |
case '(': |
case '(': |
| 1198 |
bravalue = OP_COND; /* Conditional group */ |
bravalue = OP_COND; /* Conditional group */ |
| 1199 |
if ((pcre_ctypes[*(++ptr)] & ctype_digit) != 0) |
if ((cd->ctypes[*(++ptr)] & ctype_digit) != 0) |
| 1200 |
{ |
{ |
| 1201 |
condref = *ptr - '0'; |
condref = *ptr - '0'; |
| 1202 |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
while (*(++ptr) != ')') condref = condref*10 + *ptr - '0'; |
| 1329 |
errorptr, /* Where to put an error message */ |
errorptr, /* Where to put an error message */ |
| 1330 |
(bravalue == OP_ASSERTBACK || |
(bravalue == OP_ASSERTBACK || |
| 1331 |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
| 1332 |
condref)) /* Condition reference number */ |
condref, /* Condition reference number */ |
| 1333 |
|
cd)) /* Tables block */ |
| 1334 |
goto FAILED; |
goto FAILED; |
| 1335 |
|
|
| 1336 |
/* At the end of compiling, code is still pointing to the start of the |
/* At the end of compiling, code is still pointing to the start of the |
| 1378 |
|
|
| 1379 |
case '\\': |
case '\\': |
| 1380 |
tempptr = ptr; |
tempptr = ptr; |
| 1381 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
| 1382 |
|
|
| 1383 |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
/* Handle metacharacters introduced by \. For ones like \d, the ESC_ values |
| 1384 |
are arranged to be the negation of the corresponding OP_values. For the |
are arranged to be the negation of the corresponding OP_values. For the |
| 1423 |
{ |
{ |
| 1424 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
| 1425 |
{ |
{ |
| 1426 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
| 1427 |
if (c == '#') |
if (c == '#') |
| 1428 |
{ |
{ |
| 1429 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
| 1439 |
if (c == '\\') |
if (c == '\\') |
| 1440 |
{ |
{ |
| 1441 |
tempptr = ptr; |
tempptr = ptr; |
| 1442 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
| 1443 |
if (c < 0) { ptr = tempptr; break; } |
if (c < 0) { ptr = tempptr; break; } |
| 1444 |
} |
} |
| 1445 |
|
|
| 1451 |
|
|
| 1452 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
| 1453 |
|
|
| 1454 |
while (length < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0); |
| 1455 |
|
|
| 1456 |
/* Compute the length and set it in the data vector, and advance to |
/* Compute the length and set it in the data vector, and advance to |
| 1457 |
the next state. */ |
the next state. */ |
| 1496 |
errorptr -> pointer to error message |
errorptr -> pointer to error message |
| 1497 |
lookbehind TRUE if this is a lookbehind assertion |
lookbehind TRUE if this is a lookbehind assertion |
| 1498 |
condref > 0 for OPT_CREF setting at start of conditional group |
condref > 0 for OPT_CREF setting at start of conditional group |
| 1499 |
|
cd points to the data block with tables pointers |
| 1500 |
|
|
| 1501 |
Returns: TRUE on success |
Returns: TRUE on success |
| 1502 |
*/ |
*/ |
| 1503 |
|
|
| 1504 |
static BOOL |
static BOOL |
| 1505 |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
| 1506 |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref) |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, |
| 1507 |
|
compile_data *cd) |
| 1508 |
{ |
{ |
| 1509 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
| 1510 |
uschar *code = *codeptr; |
uschar *code = *codeptr; |
| 1551 |
|
|
| 1552 |
/* Now compile the branch */ |
/* Now compile the branch */ |
| 1553 |
|
|
| 1554 |
if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged)) |
if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd)) |
| 1555 |
{ |
{ |
| 1556 |
*ptrptr = ptr; |
*ptrptr = ptr; |
| 1557 |
return FALSE; |
return FALSE; |
| 1821 |
options various option bits |
options various option bits |
| 1822 |
errorptr pointer to pointer to error text |
errorptr pointer to pointer to error text |
| 1823 |
erroroffset ptr offset in pattern where error was detected |
erroroffset ptr offset in pattern where error was detected |
| 1824 |
|
tables pointer to character tables or NULL |
| 1825 |
|
|
| 1826 |
Returns: pointer to compiled data block, or NULL on error, |
Returns: pointer to compiled data block, or NULL on error, |
| 1827 |
with errorptr and erroroffset set |
with errorptr and erroroffset set |
| 1829 |
|
|
| 1830 |
pcre * |
pcre * |
| 1831 |
pcre_compile(const char *pattern, int options, const char **errorptr, |
pcre_compile(const char *pattern, int options, const char **errorptr, |
| 1832 |
int *erroroffset) |
int *erroroffset, const unsigned char *tables) |
| 1833 |
{ |
{ |
| 1834 |
real_pcre *re; |
real_pcre *re; |
| 1835 |
int length = 3; /* For initial BRA plus length */ |
int length = 3; /* For initial BRA plus length */ |
| 1842 |
unsigned int brastackptr = 0; |
unsigned int brastackptr = 0; |
| 1843 |
uschar *code; |
uschar *code; |
| 1844 |
const uschar *ptr; |
const uschar *ptr; |
| 1845 |
|
compile_data compile_block; |
| 1846 |
int brastack[BRASTACK_SIZE]; |
int brastack[BRASTACK_SIZE]; |
| 1847 |
uschar bralenstack[BRASTACK_SIZE]; |
uschar bralenstack[BRASTACK_SIZE]; |
| 1848 |
|
|
| 1871 |
return NULL; |
return NULL; |
| 1872 |
} |
} |
| 1873 |
|
|
| 1874 |
|
/* Set up pointers to the individual character tables */ |
| 1875 |
|
|
| 1876 |
|
if (tables == NULL) tables = pcre_default_tables; |
| 1877 |
|
compile_block.lcc = tables + lcc_offset; |
| 1878 |
|
compile_block.fcc = tables + fcc_offset; |
| 1879 |
|
compile_block.cbits = tables + cbits_offset; |
| 1880 |
|
compile_block.ctypes = tables + ctypes_offset; |
| 1881 |
|
|
| 1882 |
|
/* Reflect pattern for debugging output */ |
| 1883 |
|
|
| 1884 |
DPRINTF(("------------------------------------------------------------------\n")); |
DPRINTF(("------------------------------------------------------------------\n")); |
| 1885 |
DPRINTF(("%s\n", pattern)); |
DPRINTF(("%s\n", pattern)); |
| 1886 |
|
|
| 1899 |
|
|
| 1900 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
| 1901 |
{ |
{ |
| 1902 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
| 1903 |
if (c == '#') |
if (c == '#') |
| 1904 |
{ |
{ |
| 1905 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
| 1917 |
case '\\': |
case '\\': |
| 1918 |
{ |
{ |
| 1919 |
const uschar *save_ptr = ptr; |
const uschar *save_ptr = ptr; |
| 1920 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, &compile_block); |
| 1921 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 1922 |
if (c >= 0) |
if (c >= 0) |
| 1923 |
{ |
{ |
| 1937 |
int refnum = -c - ESC_REF; |
int refnum = -c - ESC_REF; |
| 1938 |
if (refnum > top_backref) top_backref = refnum; |
if (refnum > top_backref) top_backref = refnum; |
| 1939 |
length++; /* For single back reference */ |
length++; /* For single back reference */ |
| 1940 |
if (ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
| 1941 |
{ |
{ |
| 1942 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
| 1943 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 1944 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
| 1945 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
| 1963 |
or back reference. */ |
or back reference. */ |
| 1964 |
|
|
| 1965 |
case '{': |
case '{': |
| 1966 |
if (!is_counted_repeat(ptr+1)) goto NORMAL_CHAR; |
if (!is_counted_repeat(ptr+1, &compile_block)) goto NORMAL_CHAR; |
| 1967 |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+1, &min, &max, errorptr, &compile_block); |
| 1968 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 1969 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
| 1970 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
| 1999 |
{ |
{ |
| 2000 |
if (*ptr == '\\') |
if (*ptr == '\\') |
| 2001 |
{ |
{ |
| 2002 |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE); |
int ch = check_escape(&ptr, errorptr, bracount, options, TRUE, |
| 2003 |
|
&compile_block); |
| 2004 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 2005 |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
if (-ch == ESC_b) class_charcount++; else class_charcount = 10; |
| 2006 |
} |
} |
| 2017 |
|
|
| 2018 |
/* A repeat needs either 1 or 5 bytes. */ |
/* A repeat needs either 1 or 5 bytes. */ |
| 2019 |
|
|
| 2020 |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2, &compile_block)) |
| 2021 |
{ |
{ |
| 2022 |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr); |
ptr = read_repeat_counts(ptr+2, &min, &max, errorptr, &compile_block); |
| 2023 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 2024 |
if ((min == 0 && (max == 1 || max == -1)) || |
if ((min == 0 && (max == 1 || max == -1)) || |
| 2025 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
| 2085 |
group. */ |
group. */ |
| 2086 |
|
|
| 2087 |
case '(': |
case '(': |
| 2088 |
if ((pcre_ctypes[ptr[3]] & ctype_digit) != 0) |
if ((compile_block.ctypes[ptr[3]] & ctype_digit) != 0) |
| 2089 |
{ |
{ |
| 2090 |
ptr += 4; |
ptr += 4; |
| 2091 |
length += 2; |
length += 2; |
| 2092 |
while ((pcre_ctypes[*ptr] & ctype_digit) != 0) ptr++; |
while ((compile_block.ctypes[*ptr] & ctype_digit) != 0) ptr++; |
| 2093 |
if (*ptr != ')') |
if (*ptr != ')') |
| 2094 |
{ |
{ |
| 2095 |
*errorptr = ERR26; |
*errorptr = ERR26; |
| 2258 |
/* Leave ptr at the final char; for read_repeat_counts this happens |
/* Leave ptr at the final char; for read_repeat_counts this happens |
| 2259 |
automatically; for the others we need an increment. */ |
automatically; for the others we need an increment. */ |
| 2260 |
|
|
| 2261 |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2)) |
if ((c = ptr[1]) == '{' && is_counted_repeat(ptr+2, &compile_block)) |
| 2262 |
{ |
{ |
| 2263 |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr); |
ptr = read_repeat_counts(ptr+2, &minval, &maxval, errorptr, |
| 2264 |
|
&compile_block); |
| 2265 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 2266 |
} |
} |
| 2267 |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
else if (c == '*') { minval = 0; maxval = -1; ptr++; } |
| 2292 |
{ |
{ |
| 2293 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
| 2294 |
{ |
{ |
| 2295 |
if ((pcre_ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
| 2296 |
if (c == '#') |
if (c == '#') |
| 2297 |
{ |
{ |
| 2298 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
while ((c = *(++ptr)) != 0 && c != '\n'); |
| 2306 |
if (c == '\\') |
if (c == '\\') |
| 2307 |
{ |
{ |
| 2308 |
const uschar *saveptr = ptr; |
const uschar *saveptr = ptr; |
| 2309 |
c = check_escape(&ptr, errorptr, bracount, options, FALSE); |
c = check_escape(&ptr, errorptr, bracount, options, FALSE, |
| 2310 |
|
&compile_block); |
| 2311 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 2312 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
| 2313 |
} |
} |
| 2319 |
|
|
| 2320 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
| 2321 |
|
|
| 2322 |
while (runlength < 255 && (pcre_ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (runlength < 255 && |
| 2323 |
|
(compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0); |
| 2324 |
|
|
| 2325 |
ptr--; |
ptr--; |
| 2326 |
length += runlength; |
length += runlength; |
| 2355 |
|
|
| 2356 |
re->magic_number = MAGIC_NUMBER; |
re->magic_number = MAGIC_NUMBER; |
| 2357 |
re->options = options; |
re->options = options; |
| 2358 |
|
re->tables = tables; |
| 2359 |
|
|
| 2360 |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
/* Set up a starting, non-extracting bracket, then compile the expression. On |
| 2361 |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
error, *errorptr will be set non-NULL, so we don't need to look at the result |
| 2365 |
code = re->code; |
code = re->code; |
| 2366 |
*code = OP_BRA; |
*code = OP_BRA; |
| 2367 |
bracount = 0; |
bracount = 0; |
| 2368 |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1); |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, |
| 2369 |
|
&compile_block); |
| 2370 |
re->top_bracket = bracount; |
re->top_bracket = bracount; |
| 2371 |
re->top_backref = top_backref; |
re->top_backref = top_backref; |
| 2372 |
|
|
| 2663 |
|
|
| 2664 |
|
|
| 2665 |
/************************************************* |
/************************************************* |
|
* Match a character type * |
|
|
*************************************************/ |
|
|
|
|
|
/* Not used in all the places it might be as it's sometimes faster |
|
|
to put the code inline. |
|
|
|
|
|
Arguments: |
|
|
type the character type |
|
|
c the character |
|
|
dotall the dotall flag |
|
|
|
|
|
Returns: TRUE if character is of the type |
|
|
*/ |
|
|
|
|
|
static BOOL |
|
|
match_type(int type, int c, BOOL dotall) |
|
|
{ |
|
|
|
|
|
#ifdef DEBUG |
|
|
if (isprint(c)) printf("matching subject %c against ", c); |
|
|
else printf("matching subject \\x%02x against ", c); |
|
|
printf("%s\n", OP_names[type]); |
|
|
#endif |
|
|
|
|
|
switch(type) |
|
|
{ |
|
|
case OP_ANY: return dotall || c != '\n'; |
|
|
case OP_NOT_DIGIT: return (pcre_ctypes[c] & ctype_digit) == 0; |
|
|
case OP_DIGIT: return (pcre_ctypes[c] & ctype_digit) != 0; |
|
|
case OP_NOT_WHITESPACE: return (pcre_ctypes[c] & ctype_space) == 0; |
|
|
case OP_WHITESPACE: return (pcre_ctypes[c] & ctype_space) != 0; |
|
|
case OP_NOT_WORDCHAR: return (pcre_ctypes[c] & ctype_word) == 0; |
|
|
case OP_WORDCHAR: return (pcre_ctypes[c] & ctype_word) != 0; |
|
|
} |
|
|
return FALSE; |
|
|
} |
|
|
|
|
|
|
|
|
|
|
|
/************************************************* |
|
| 2666 |
* Match a back-reference * |
* Match a back-reference * |
| 2667 |
*************************************************/ |
*************************************************/ |
| 2668 |
|
|
| 2705 |
/* Separate the caselesss case for speed */ |
/* Separate the caselesss case for speed */ |
| 2706 |
|
|
| 2707 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
| 2708 |
{ while (length-- > 0) if (pcre_lcc[*p++] != pcre_lcc[*eptr++]) return FALSE; } |
{ |
| 2709 |
|
while (length-- > 0) |
| 2710 |
|
if (md->lcc[*p++] != md->lcc[*eptr++]) return FALSE; |
| 2711 |
|
} |
| 2712 |
else |
else |
| 2713 |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
{ while (length-- > 0) if (*p++ != *eptr++) return FALSE; } |
| 2714 |
|
|
| 3161 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
| 3162 |
{ |
{ |
| 3163 |
BOOL prev_is_word = (eptr != md->start_subject) && |
BOOL prev_is_word = (eptr != md->start_subject) && |
| 3164 |
((pcre_ctypes[eptr[-1]] & ctype_word) != 0); |
((md->ctypes[eptr[-1]] & ctype_word) != 0); |
| 3165 |
BOOL cur_is_word = (eptr < md->end_subject) && |
BOOL cur_is_word = (eptr < md->end_subject) && |
| 3166 |
((pcre_ctypes[*eptr] & ctype_word) != 0); |
((md->ctypes[*eptr] & ctype_word) != 0); |
| 3167 |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
| 3168 |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
| 3169 |
return FALSE; |
return FALSE; |
| 3180 |
break; |
break; |
| 3181 |
|
|
| 3182 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 3183 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) != 0) |
if (eptr >= md->end_subject || |
| 3184 |
|
(md->ctypes[*eptr++] & ctype_digit) != 0) |
| 3185 |
return FALSE; |
return FALSE; |
| 3186 |
ecode++; |
ecode++; |
| 3187 |
break; |
break; |
| 3188 |
|
|
| 3189 |
case OP_DIGIT: |
case OP_DIGIT: |
| 3190 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_digit) == 0) |
if (eptr >= md->end_subject || |
| 3191 |
|
(md->ctypes[*eptr++] & ctype_digit) == 0) |
| 3192 |
return FALSE; |
return FALSE; |
| 3193 |
ecode++; |
ecode++; |
| 3194 |
break; |
break; |
| 3195 |
|
|
| 3196 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 3197 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) != 0) |
if (eptr >= md->end_subject || |
| 3198 |
|
(md->ctypes[*eptr++] & ctype_space) != 0) |
| 3199 |
return FALSE; |
return FALSE; |
| 3200 |
ecode++; |
ecode++; |
| 3201 |
break; |
break; |
| 3202 |
|
|
| 3203 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
| 3204 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_space) == 0) |
if (eptr >= md->end_subject || |
| 3205 |
|
(md->ctypes[*eptr++] & ctype_space) == 0) |
| 3206 |
return FALSE; |
return FALSE; |
| 3207 |
ecode++; |
ecode++; |
| 3208 |
break; |
break; |
| 3209 |
|
|
| 3210 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 3211 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) != 0) |
if (eptr >= md->end_subject || |
| 3212 |
|
(md->ctypes[*eptr++] & ctype_word) != 0) |
| 3213 |
return FALSE; |
return FALSE; |
| 3214 |
ecode++; |
ecode++; |
| 3215 |
break; |
break; |
| 3216 |
|
|
| 3217 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 3218 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr++] & ctype_word) == 0) |
if (eptr >= md->end_subject || |
| 3219 |
|
(md->ctypes[*eptr++] & ctype_word) == 0) |
| 3220 |
return FALSE; |
return FALSE; |
| 3221 |
ecode++; |
ecode++; |
| 3222 |
break; |
break; |
| 3448 |
if (length > md->end_subject - eptr) return FALSE; |
if (length > md->end_subject - eptr) return FALSE; |
| 3449 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
| 3450 |
{ |
{ |
| 3451 |
while (length-- > 0) if (pcre_lcc[*ecode++] != pcre_lcc[*eptr++]) return FALSE; |
while (length-- > 0) |
| 3452 |
|
if (md->lcc[*ecode++] != md->lcc[*eptr++]) |
| 3453 |
|
return FALSE; |
| 3454 |
} |
} |
| 3455 |
else |
else |
| 3456 |
{ |
{ |
| 3507 |
|
|
| 3508 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
| 3509 |
{ |
{ |
| 3510 |
c = pcre_lcc[c]; |
c = md->lcc[c]; |
| 3511 |
for (i = 1; i <= min; i++) if (c != pcre_lcc[*eptr++]) return FALSE; |
for (i = 1; i <= min; i++) |
| 3512 |
|
if (c != md->lcc[*eptr++]) return FALSE; |
| 3513 |
if (min == max) continue; |
if (min == max) continue; |
| 3514 |
if (minimize) |
if (minimize) |
| 3515 |
{ |
{ |
| 3517 |
{ |
{ |
| 3518 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
| 3519 |
return TRUE; |
return TRUE; |
| 3520 |
if (i >= max || eptr >= md->end_subject || c != pcre_lcc[*eptr++]) |
if (i >= max || eptr >= md->end_subject || |
| 3521 |
|
c != md->lcc[*eptr++]) |
| 3522 |
return FALSE; |
return FALSE; |
| 3523 |
} |
} |
| 3524 |
/* Control never gets here */ |
/* Control never gets here */ |
| 3528 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
| 3529 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 3530 |
{ |
{ |
| 3531 |
if (eptr >= md->end_subject || c != pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c != md->lcc[*eptr]) break; |
| 3532 |
eptr++; |
eptr++; |
| 3533 |
} |
} |
| 3534 |
while (eptr >= pp) |
while (eptr >= pp) |
| 3578 |
ecode++; |
ecode++; |
| 3579 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
| 3580 |
{ |
{ |
| 3581 |
if (pcre_lcc[*ecode++] == pcre_lcc[*eptr++]) return FALSE; |
if (md->lcc[*ecode++] == md->lcc[*eptr++]) return FALSE; |
| 3582 |
} |
} |
| 3583 |
else |
else |
| 3584 |
{ |
{ |
| 3638 |
|
|
| 3639 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
| 3640 |
{ |
{ |
| 3641 |
c = pcre_lcc[c]; |
c = md->lcc[c]; |
| 3642 |
for (i = 1; i <= min; i++) if (c == pcre_lcc[*eptr++]) return FALSE; |
for (i = 1; i <= min; i++) |
| 3643 |
|
if (c == md->lcc[*eptr++]) return FALSE; |
| 3644 |
if (min == max) continue; |
if (min == max) continue; |
| 3645 |
if (minimize) |
if (minimize) |
| 3646 |
{ |
{ |
| 3648 |
{ |
{ |
| 3649 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
| 3650 |
return TRUE; |
return TRUE; |
| 3651 |
if (i >= max || eptr >= md->end_subject || c == pcre_lcc[*eptr++]) |
if (i >= max || eptr >= md->end_subject || |
| 3652 |
|
c == md->lcc[*eptr++]) |
| 3653 |
return FALSE; |
return FALSE; |
| 3654 |
} |
} |
| 3655 |
/* Control never gets here */ |
/* Control never gets here */ |
| 3659 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
| 3660 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 3661 |
{ |
{ |
| 3662 |
if (eptr >= md->end_subject || c == pcre_lcc[*eptr]) break; |
if (eptr >= md->end_subject || c == md->lcc[*eptr]) break; |
| 3663 |
eptr++; |
eptr++; |
| 3664 |
} |
} |
| 3665 |
while (eptr >= pp) |
while (eptr >= pp) |
| 3753 |
|
|
| 3754 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 3755 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 3756 |
if ((pcre_ctypes[*eptr++] & ctype_digit) != 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_digit) != 0) return FALSE; |
| 3757 |
break; |
break; |
| 3758 |
|
|
| 3759 |
case OP_DIGIT: |
case OP_DIGIT: |
| 3760 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 3761 |
if ((pcre_ctypes[*eptr++] & ctype_digit) == 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_digit) == 0) return FALSE; |
| 3762 |
break; |
break; |
| 3763 |
|
|
| 3764 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 3765 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 3766 |
if ((pcre_ctypes[*eptr++] & ctype_space) != 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_space) != 0) return FALSE; |
| 3767 |
break; |
break; |
| 3768 |
|
|
| 3769 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
| 3770 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 3771 |
if ((pcre_ctypes[*eptr++] & ctype_space) == 0) return FALSE; |
if ((md->ctypes[*eptr++] & ctype_space) == 0) return FALSE; |
| 3772 |
break; |
break; |
| 3773 |
|
|
| 3774 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 3775 |
for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) != 0) |
for (i = 1; i <= min; i++) |
| 3776 |
return FALSE; |
if ((md->ctypes[*eptr++] & ctype_word) != 0) |
| 3777 |
|
return FALSE; |
| 3778 |
break; |
break; |
| 3779 |
|
|
| 3780 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 3781 |
for (i = 1; i <= min; i++) if ((pcre_ctypes[*eptr++] & ctype_word) == 0) |
for (i = 1; i <= min; i++) |
| 3782 |
return FALSE; |
if ((md->ctypes[*eptr++] & ctype_word) == 0) |
| 3783 |
|
return FALSE; |
| 3784 |
break; |
break; |
| 3785 |
} |
} |
| 3786 |
|
|
| 3789 |
if (min == max) continue; |
if (min == max) continue; |
| 3790 |
|
|
| 3791 |
/* If minimizing, we have to test the rest of the pattern before each |
/* If minimizing, we have to test the rest of the pattern before each |
| 3792 |
subsequent match, so inlining isn't much help; just use the function. */ |
subsequent match. */ |
| 3793 |
|
|
| 3794 |
if (minimize) |
if (minimize) |
| 3795 |
{ |
{ |
| 3796 |
for (i = min;; i++) |
for (i = min;; i++) |
| 3797 |
{ |
{ |
| 3798 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
| 3799 |
if (i >= max || eptr >= md->end_subject || |
if (i >= max || eptr >= md->end_subject) return FALSE; |
| 3800 |
!match_type(ctype, *eptr++, (ims & PCRE_DOTALL) != 0)) |
|
| 3801 |
return FALSE; |
c = *eptr++; |
| 3802 |
|
switch(ctype) |
| 3803 |
|
{ |
| 3804 |
|
case OP_ANY: |
| 3805 |
|
if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE; |
| 3806 |
|
break; |
| 3807 |
|
|
| 3808 |
|
case OP_NOT_DIGIT: |
| 3809 |
|
if ((md->ctypes[c] & ctype_digit) != 0) return FALSE; |
| 3810 |
|
break; |
| 3811 |
|
|
| 3812 |
|
case OP_DIGIT: |
| 3813 |
|
if ((md->ctypes[c] & ctype_digit) == 0) return FALSE; |
| 3814 |
|
break; |
| 3815 |
|
|
| 3816 |
|
case OP_NOT_WHITESPACE: |
| 3817 |
|
if ((md->ctypes[c] & ctype_space) != 0) return FALSE; |
| 3818 |
|
break; |
| 3819 |
|
|
| 3820 |
|
case OP_WHITESPACE: |
| 3821 |
|
if ((md->ctypes[c] & ctype_space) == 0) return FALSE; |
| 3822 |
|
break; |
| 3823 |
|
|
| 3824 |
|
case OP_NOT_WORDCHAR: |
| 3825 |
|
if ((md->ctypes[c] & ctype_word) != 0) return FALSE; |
| 3826 |
|
break; |
| 3827 |
|
|
| 3828 |
|
case OP_WORDCHAR: |
| 3829 |
|
if ((md->ctypes[c] & ctype_word) == 0) return FALSE; |
| 3830 |
|
break; |
| 3831 |
|
} |
| 3832 |
} |
} |
| 3833 |
/* Control never gets here */ |
/* Control never gets here */ |
| 3834 |
} |
} |
| 3861 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 3862 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 3863 |
{ |
{ |
| 3864 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) != 0) |
| 3865 |
break; |
break; |
| 3866 |
eptr++; |
eptr++; |
| 3867 |
} |
} |
| 3870 |
case OP_DIGIT: |
case OP_DIGIT: |
| 3871 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 3872 |
{ |
{ |
| 3873 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_digit) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_digit) == 0) |
| 3874 |
break; |
break; |
| 3875 |
eptr++; |
eptr++; |
| 3876 |
} |
} |
| 3879 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 3880 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 3881 |
{ |
{ |
| 3882 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) != 0) |
| 3883 |
break; |
break; |
| 3884 |
eptr++; |
eptr++; |
| 3885 |
} |
} |
| 3888 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
| 3889 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 3890 |
{ |
{ |
| 3891 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_space) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_space) == 0) |
| 3892 |
break; |
break; |
| 3893 |
eptr++; |
eptr++; |
| 3894 |
} |
} |
| 3897 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 3898 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 3899 |
{ |
{ |
| 3900 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) != 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) != 0) |
| 3901 |
break; |
break; |
| 3902 |
eptr++; |
eptr++; |
| 3903 |
} |
} |
| 3906 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 3907 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 3908 |
{ |
{ |
| 3909 |
if (eptr >= md->end_subject || (pcre_ctypes[*eptr] & ctype_word) == 0) |
if (eptr >= md->end_subject || (md->ctypes[*eptr] & ctype_word) == 0) |
| 3910 |
break; |
break; |
| 3911 |
eptr++; |
eptr++; |
| 3912 |
} |
} |
| 3996 |
|
|
| 3997 |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
| 3998 |
|
|
| 3999 |
|
match_block.lcc = re->tables + lcc_offset; |
| 4000 |
|
match_block.ctypes = re->tables + ctypes_offset; |
| 4001 |
|
|
| 4002 |
/* The ims options can vary during the matching as a result of the presence |
/* The ims options can vary during the matching as a result of the presence |
| 4003 |
of (?ims) items in the pattern. They are kept in a local variable so that |
of (?ims) items in the pattern. They are kept in a local variable so that |
| 4004 |
restoring at the exit of a group is easy. */ |
restoring at the exit of a group is easy. */ |
| 4044 |
if ((re->options & PCRE_FIRSTSET) != 0) |
if ((re->options & PCRE_FIRSTSET) != 0) |
| 4045 |
{ |
{ |
| 4046 |
first_char = re->first_char; |
first_char = re->first_char; |
| 4047 |
if ((ims & PCRE_CASELESS) != 0) first_char = pcre_lcc[first_char]; |
if ((ims & PCRE_CASELESS) != 0) first_char = match_block.lcc[first_char]; |
| 4048 |
} |
} |
| 4049 |
else |
else |
| 4050 |
if (!startline && extra != NULL && |
if (!startline && extra != NULL && |
| 4069 |
if (first_char >= 0) |
if (first_char >= 0) |
| 4070 |
{ |
{ |
| 4071 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
| 4072 |
while (start_match < end_subject && pcre_lcc[*start_match] != first_char) |
while (start_match < end_subject && |
| 4073 |
|
match_block.lcc[*start_match] != first_char) |
| 4074 |
start_match++; |
start_match++; |
| 4075 |
else |
else |
| 4076 |
while (start_match < end_subject && *start_match != first_char) |
while (start_match < end_subject && *start_match != first_char) |
| 4143 |
DPRINTF((">>>> returning %d\n", rc)); |
DPRINTF((">>>> returning %d\n", rc)); |
| 4144 |
return rc; |
return rc; |
| 4145 |
} |
} |
| 4146 |
|
|
| 4147 |
|
/* This "while" is the end of the "do" above */ |
| 4148 |
|
|
| 4149 |
while (!anchored && |
while (!anchored && |
| 4150 |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
match_block.errorcode == PCRE_ERROR_NOMATCH && |
| 4151 |
start_match++ < end_subject); |
start_match++ < end_subject); |