| 73 |
Returns: the minimum length |
Returns: the minimum length |
| 74 |
-1 if \C was encountered |
-1 if \C was encountered |
| 75 |
-2 internal error (missing capturing bracket) |
-2 internal error (missing capturing bracket) |
| 76 |
|
-3 internal error (opcode not listed) |
| 77 |
*/ |
*/ |
| 78 |
|
|
| 79 |
static int |
static int |
| 192 |
case OP_NOT: |
case OP_NOT: |
| 193 |
case OP_NOTI: |
case OP_NOTI: |
| 194 |
case OP_PLUS: |
case OP_PLUS: |
| 195 |
|
case OP_PLUSI: |
| 196 |
case OP_MINPLUS: |
case OP_MINPLUS: |
| 197 |
|
case OP_MINPLUSI: |
| 198 |
case OP_POSPLUS: |
case OP_POSPLUS: |
| 199 |
|
case OP_POSPLUSI: |
| 200 |
case OP_NOTPLUS: |
case OP_NOTPLUS: |
| 201 |
|
case OP_NOTPLUSI: |
| 202 |
case OP_NOTMINPLUS: |
case OP_NOTMINPLUS: |
| 203 |
|
case OP_NOTMINPLUSI: |
| 204 |
case OP_NOTPOSPLUS: |
case OP_NOTPOSPLUS: |
| 205 |
|
case OP_NOTPOSPLUSI: |
| 206 |
branchlength++; |
branchlength++; |
| 207 |
cc += 2; |
cc += 2; |
| 208 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 221 |
need to skip over a multibyte character in UTF8 mode. */ |
need to skip over a multibyte character in UTF8 mode. */ |
| 222 |
|
|
| 223 |
case OP_EXACT: |
case OP_EXACT: |
| 224 |
|
case OP_EXACTI: |
| 225 |
case OP_NOTEXACT: |
case OP_NOTEXACT: |
| 226 |
|
case OP_NOTEXACTI: |
| 227 |
branchlength += GET2(cc,1); |
branchlength += GET2(cc,1); |
| 228 |
cc += 4; |
cc += 4; |
| 229 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 406 |
of a character, we must take special action for UTF-8 characters. As it |
of a character, we must take special action for UTF-8 characters. As it |
| 407 |
happens, the "NOT" versions of these opcodes are used at present only for |
happens, the "NOT" versions of these opcodes are used at present only for |
| 408 |
ASCII characters, so they could be omitted from this list. However, in |
ASCII characters, so they could be omitted from this list. However, in |
| 409 |
future that may change, so we leave them in this special case. */ |
future that may change, so we include them here so as not to leave a |
| 410 |
|
gotcha for a future maintainer. */ |
| 411 |
|
|
| 412 |
case OP_UPTO: |
case OP_UPTO: |
| 413 |
case OP_UPTOI: |
case OP_UPTOI: |
| 466 |
cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE]; |
cc += _pcre_OP_lengths[op] + cc[1+LINK_SIZE]; |
| 467 |
break; |
break; |
| 468 |
|
|
| 469 |
/* For the record, these are the opcodes that are matched by "default": |
/* The remaining opcodes are just skipped over. */ |
|
OP_ACCEPT, OP_CLOSE, OP_COMMIT, OP_FAIL, OP_PRUNE, OP_SET_SOM, OP_SKIP, |
|
|
OP_THEN. */ |
|
| 470 |
|
|
| 471 |
default: |
case OP_ACCEPT: |
| 472 |
|
case OP_CLOSE: |
| 473 |
|
case OP_COMMIT: |
| 474 |
|
case OP_FAIL: |
| 475 |
|
case OP_PRUNE: |
| 476 |
|
case OP_SET_SOM: |
| 477 |
|
case OP_SKIP: |
| 478 |
|
case OP_THEN: |
| 479 |
cc += _pcre_OP_lengths[op]; |
cc += _pcre_OP_lengths[op]; |
| 480 |
break; |
break; |
| 481 |
|
|
| 482 |
|
/* This should not occur: we list all opcodes explicitly so that when |
| 483 |
|
new ones get added they are properly considered. */ |
| 484 |
|
|
| 485 |
|
default: |
| 486 |
|
return -3; |
| 487 |
} |
} |
| 488 |
} |
} |
| 489 |
/* Control never gets here */ |
/* Control never gets here */ |
| 629 |
Arguments: |
Arguments: |
| 630 |
code points to an expression |
code points to an expression |
| 631 |
start_bits points to a 32-byte table, initialized to 0 |
start_bits points to a 32-byte table, initialized to 0 |
|
caseless the current state of the caseless flag |
|
| 632 |
utf8 TRUE if in UTF-8 mode |
utf8 TRUE if in UTF-8 mode |
| 633 |
cd the block with char table pointers |
cd the block with char table pointers |
| 634 |
|
|
| 638 |
*/ |
*/ |
| 639 |
|
|
| 640 |
static int |
static int |
| 641 |
set_start_bits(const uschar *code, uschar *start_bits, BOOL caseless, |
set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8, |
| 642 |
BOOL utf8, compile_data *cd) |
compile_data *cd) |
| 643 |
{ |
{ |
| 644 |
register int c; |
register int c; |
| 645 |
int yield = SSB_DONE; |
int yield = SSB_DONE; |
| 688 |
case OP_SCBRA: |
case OP_SCBRA: |
| 689 |
case OP_ONCE: |
case OP_ONCE: |
| 690 |
case OP_ASSERT: |
case OP_ASSERT: |
| 691 |
rc = set_start_bits(tcode, start_bits, caseless, utf8, cd); |
rc = set_start_bits(tcode, start_bits, utf8, cd); |
| 692 |
if (rc == SSB_FAIL) return SSB_FAIL; |
if (rc == SSB_FAIL) return SSB_FAIL; |
| 693 |
if (rc == SSB_DONE) try_next = FALSE; else |
if (rc == SSB_DONE) try_next = FALSE; else |
| 694 |
{ |
{ |
| 733 |
|
|
| 734 |
case OP_BRAZERO: |
case OP_BRAZERO: |
| 735 |
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
| 736 |
if (set_start_bits(++tcode, start_bits, caseless, utf8, cd) == SSB_FAIL) |
if (set_start_bits(++tcode, start_bits, utf8, cd) == SSB_FAIL) |
| 737 |
return SSB_FAIL; |
return SSB_FAIL; |
| 738 |
/* ========================================================================= |
/* ========================================================================= |
| 739 |
See the comment at the head of this function concerning the next line, |
See the comment at the head of this function concerning the next line, |
| 760 |
case OP_QUERY: |
case OP_QUERY: |
| 761 |
case OP_MINQUERY: |
case OP_MINQUERY: |
| 762 |
case OP_POSQUERY: |
case OP_POSQUERY: |
| 763 |
tcode = set_table_bit(start_bits, tcode + 1, caseless, cd, utf8); |
tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8); |
| 764 |
break; |
break; |
| 765 |
|
|
| 766 |
case OP_STARI: |
case OP_STARI: |
| 777 |
case OP_UPTO: |
case OP_UPTO: |
| 778 |
case OP_MINUPTO: |
case OP_MINUPTO: |
| 779 |
case OP_POSUPTO: |
case OP_POSUPTO: |
| 780 |
tcode = set_table_bit(start_bits, tcode + 3, caseless, cd, utf8); |
tcode = set_table_bit(start_bits, tcode + 3, FALSE, cd, utf8); |
| 781 |
break; |
break; |
| 782 |
|
|
| 783 |
case OP_UPTOI: |
case OP_UPTOI: |
| 795 |
case OP_PLUS: |
case OP_PLUS: |
| 796 |
case OP_MINPLUS: |
case OP_MINPLUS: |
| 797 |
case OP_POSPLUS: |
case OP_POSPLUS: |
| 798 |
(void)set_table_bit(start_bits, tcode + 1, caseless, cd, utf8); |
(void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8); |
| 799 |
try_next = FALSE; |
try_next = FALSE; |
| 800 |
break; |
break; |
| 801 |
|
|
| 802 |
|
case OP_EXACTI: |
| 803 |
|
tcode += 2; |
| 804 |
|
/* Fall through */ |
| 805 |
case OP_CHARI: |
case OP_CHARI: |
| 806 |
case OP_PLUSI: |
case OP_PLUSI: |
| 807 |
case OP_MINPLUSI: |
case OP_MINPLUSI: |
| 1143 |
/* See if we can find a fixed set of initial characters for the pattern. */ |
/* See if we can find a fixed set of initial characters for the pattern. */ |
| 1144 |
|
|
| 1145 |
memset(start_bits, 0, 32 * sizeof(uschar)); |
memset(start_bits, 0, 32 * sizeof(uschar)); |
| 1146 |
bits_set = set_start_bits(code, start_bits, |
bits_set = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, |
|
(re->options & PCRE_CASELESS) != 0, (re->options & PCRE_UTF8) != 0, |
|
| 1147 |
&compile_block) == SSB_DONE; |
&compile_block) == SSB_DONE; |
| 1148 |
} |
} |
| 1149 |
|
|
| 1150 |
/* Find the minimum length of subject string. */ |
/* Find the minimum length of subject string. */ |
| 1151 |
|
|
| 1152 |
min = find_minlength(code, code, re->options); |
switch(min = find_minlength(code, code, re->options)) |
| 1153 |
|
{ |
| 1154 |
|
case -2: *errorptr = "internal error: missing capturing bracket"; break; |
| 1155 |
|
case -3: *errorptr = "internal error: opcode not recognized"; break; |
| 1156 |
|
default: break; |
| 1157 |
|
} |
| 1158 |
|
|
| 1159 |
/* Return NULL if no optimization is possible. */ |
/* Return NULL if no optimization is possible. */ |
| 1160 |
|
|