| 52 |
|
|
| 53 |
/* Returns from set_start_bits() */ |
/* Returns from set_start_bits() */ |
| 54 |
|
|
| 55 |
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE }; |
enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN }; |
| 56 |
|
|
| 57 |
|
|
| 58 |
|
|
| 73 |
Returns: the minimum length |
Returns: the minimum length |
| 74 |
-1 if \C was encountered |
-1 if \C was encountered |
| 75 |
-2 internal error (missing capturing bracket) |
-2 internal error (missing capturing bracket) |
| 76 |
-3 internal error (opcode not listed) |
-3 internal error (opcode not listed) |
| 77 |
*/ |
*/ |
| 78 |
|
|
| 79 |
static int |
static int |
| 140 |
case OP_KET: |
case OP_KET: |
| 141 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 142 |
case OP_KETRMIN: |
case OP_KETRMIN: |
| 143 |
case OP_KETRPOS: |
case OP_KETRPOS: |
| 144 |
case OP_END: |
case OP_END: |
| 145 |
if (length < 0 || (!had_recurse && branchlength < length)) |
if (length < 0 || (!had_recurse && branchlength < length)) |
| 146 |
length = branchlength; |
length = branchlength; |
| 382 |
min = 0; |
min = 0; |
| 383 |
cc++; |
cc++; |
| 384 |
break; |
break; |
| 385 |
|
|
| 386 |
case OP_CRPLUS: |
case OP_CRPLUS: |
| 387 |
case OP_CRMINPLUS: |
case OP_CRMINPLUS: |
| 388 |
min = 1; |
min = 1; |
| 389 |
cc++; |
cc++; |
| 390 |
break; |
break; |
| 391 |
|
|
| 392 |
case OP_CRRANGE: |
case OP_CRRANGE: |
| 393 |
case OP_CRMINRANGE: |
case OP_CRMINRANGE: |
| 394 |
min = GET2(cc, 1); |
min = GET2(cc, 1); |
| 419 |
of a character, we must take special action for UTF-8 characters. As it |
of a character, we must take special action for UTF-8 characters. As it |
| 420 |
happens, the "NOT" versions of these opcodes are used at present only for |
happens, the "NOT" versions of these opcodes are used at present only for |
| 421 |
ASCII characters, so they could be omitted from this list. However, in |
ASCII characters, so they could be omitted from this list. However, in |
| 422 |
future that may change, so we include them here so as not to leave a |
future that may change, so we include them here so as not to leave a |
| 423 |
gotcha for a future maintainer. */ |
gotcha for a future maintainer. */ |
| 424 |
|
|
| 425 |
case OP_UPTO: |
case OP_UPTO: |
| 488 |
case OP_PRUNE: |
case OP_PRUNE: |
| 489 |
case OP_SET_SOM: |
case OP_SET_SOM: |
| 490 |
case OP_SKIP: |
case OP_SKIP: |
| 491 |
case OP_THEN: |
case OP_THEN: |
| 492 |
cc += _pcre_OP_lengths[op]; |
cc += _pcre_OP_lengths[op]; |
| 493 |
break; |
break; |
| 494 |
|
|
| 495 |
/* This should not occur: we list all opcodes explicitly so that when |
/* This should not occur: we list all opcodes explicitly so that when |
| 496 |
new ones get added they are properly considered. */ |
new ones get added they are properly considered. */ |
| 497 |
|
|
| 648 |
Returns: SSB_FAIL => Failed to find any starting bytes |
Returns: SSB_FAIL => Failed to find any starting bytes |
| 649 |
SSB_DONE => Found mandatory starting bytes |
SSB_DONE => Found mandatory starting bytes |
| 650 |
SSB_CONTINUE => Found optional starting bytes |
SSB_CONTINUE => Found optional starting bytes |
| 651 |
|
SSB_UNKNOWN => Hit an unrecognized opcode |
| 652 |
*/ |
*/ |
| 653 |
|
|
| 654 |
static int |
static int |
| 655 |
set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8, |
set_start_bits(const uschar *code, uschar *start_bits, BOOL utf8, |
| 656 |
compile_data *cd) |
compile_data *cd) |
| 657 |
{ |
{ |
| 658 |
register int c; |
register int c; |
| 680 |
{ |
{ |
| 681 |
BOOL try_next = TRUE; |
BOOL try_next = TRUE; |
| 682 |
const uschar *tcode = code + 1 + LINK_SIZE; |
const uschar *tcode = code + 1 + LINK_SIZE; |
| 683 |
|
|
| 684 |
if (*code == OP_CBRA || *code == OP_SCBRA || |
if (*code == OP_CBRA || *code == OP_SCBRA || |
| 685 |
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2; |
*code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += 2; |
| 686 |
|
|
| 689 |
int rc; |
int rc; |
| 690 |
switch(*tcode) |
switch(*tcode) |
| 691 |
{ |
{ |
| 692 |
/* Fail if we reach something we don't understand */ |
/* If we reach something we don't understand, it means a new opcode has |
| 693 |
|
been created that hasn't been added to this code. Hopefully this problem |
| 694 |
|
will be discovered during testing. */ |
| 695 |
|
|
| 696 |
default: |
default: |
| 697 |
|
return SSB_UNKNOWN; |
| 698 |
|
|
| 699 |
|
/* Fail for a valid opcode that implies no starting bits. */ |
| 700 |
|
|
| 701 |
|
case OP_ACCEPT: |
| 702 |
|
case OP_ALLANY: |
| 703 |
|
case OP_ANY: |
| 704 |
|
case OP_ANYBYTE: |
| 705 |
|
case OP_CIRC: |
| 706 |
|
case OP_CIRCM: |
| 707 |
|
case OP_CLOSE: |
| 708 |
|
case OP_COMMIT: |
| 709 |
|
case OP_COND: |
| 710 |
|
case OP_CREF: |
| 711 |
|
case OP_DEF: |
| 712 |
|
case OP_DOLL: |
| 713 |
|
case OP_DOLLM: |
| 714 |
|
case OP_END: |
| 715 |
|
case OP_EOD: |
| 716 |
|
case OP_EODN: |
| 717 |
|
case OP_EXTUNI: |
| 718 |
|
case OP_FAIL: |
| 719 |
|
case OP_MARK: |
| 720 |
|
case OP_NCREF: |
| 721 |
|
case OP_NOT: |
| 722 |
|
case OP_NOTEXACT: |
| 723 |
|
case OP_NOTEXACTI: |
| 724 |
|
case OP_NOTI: |
| 725 |
|
case OP_NOTMINPLUS: |
| 726 |
|
case OP_NOTMINPLUSI: |
| 727 |
|
case OP_NOTMINQUERY: |
| 728 |
|
case OP_NOTMINQUERYI: |
| 729 |
|
case OP_NOTMINSTAR: |
| 730 |
|
case OP_NOTMINSTARI: |
| 731 |
|
case OP_NOTMINUPTO: |
| 732 |
|
case OP_NOTMINUPTOI: |
| 733 |
|
case OP_NOTPLUS: |
| 734 |
|
case OP_NOTPLUSI: |
| 735 |
|
case OP_NOTPOSPLUS: |
| 736 |
|
case OP_NOTPOSPLUSI: |
| 737 |
|
case OP_NOTPOSQUERY: |
| 738 |
|
case OP_NOTPOSQUERYI: |
| 739 |
|
case OP_NOTPOSSTAR: |
| 740 |
|
case OP_NOTPOSSTARI: |
| 741 |
|
case OP_NOTPOSUPTO: |
| 742 |
|
case OP_NOTPOSUPTOI: |
| 743 |
|
case OP_NOTPROP: |
| 744 |
|
case OP_NOTQUERY: |
| 745 |
|
case OP_NOTQUERYI: |
| 746 |
|
case OP_NOTSTAR: |
| 747 |
|
case OP_NOTSTARI: |
| 748 |
|
case OP_NOTUPTO: |
| 749 |
|
case OP_NOTUPTOI: |
| 750 |
|
case OP_NOT_HSPACE: |
| 751 |
|
case OP_NOT_VSPACE: |
| 752 |
|
case OP_NOT_WORD_BOUNDARY: |
| 753 |
|
case OP_NRREF: |
| 754 |
|
case OP_PROP: |
| 755 |
|
case OP_PRUNE: |
| 756 |
|
case OP_PRUNE_ARG: |
| 757 |
|
case OP_RECURSE: |
| 758 |
|
case OP_REF: |
| 759 |
|
case OP_REFI: |
| 760 |
|
case OP_REVERSE: |
| 761 |
|
case OP_RREF: |
| 762 |
|
case OP_SCOND: |
| 763 |
|
case OP_SET_SOM: |
| 764 |
|
case OP_SKIP: |
| 765 |
|
case OP_SKIP_ARG: |
| 766 |
|
case OP_SOD: |
| 767 |
|
case OP_SOM: |
| 768 |
|
case OP_THEN: |
| 769 |
|
case OP_THEN_ARG: |
| 770 |
|
case OP_WORD_BOUNDARY: |
| 771 |
|
case OP_XCLASS: |
| 772 |
return SSB_FAIL; |
return SSB_FAIL; |
| 773 |
|
|
| 774 |
/* If we hit a bracket or a positive lookahead assertion, recurse to set |
/* If we hit a bracket or a positive lookahead assertion, recurse to set |
| 787 |
case OP_ONCE: |
case OP_ONCE: |
| 788 |
case OP_ASSERT: |
case OP_ASSERT: |
| 789 |
rc = set_start_bits(tcode, start_bits, utf8, cd); |
rc = set_start_bits(tcode, start_bits, utf8, cd); |
| 790 |
if (rc == SSB_FAIL) return SSB_FAIL; |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
| 791 |
if (rc == SSB_DONE) try_next = FALSE; else |
if (rc == SSB_DONE) try_next = FALSE; else |
| 792 |
{ |
{ |
| 793 |
do tcode += GET(tcode, 1); while (*tcode == OP_ALT); |
do tcode += GET(tcode, 1); while (*tcode == OP_ALT); |
| 810 |
case OP_KET: |
case OP_KET: |
| 811 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 812 |
case OP_KETRMIN: |
case OP_KETRMIN: |
| 813 |
case OP_KETRPOS: |
case OP_KETRPOS: |
| 814 |
return SSB_CONTINUE; |
return SSB_CONTINUE; |
| 815 |
|
|
| 816 |
/* Skip over callout */ |
/* Skip over callout */ |
| 833 |
case OP_BRAZERO: |
case OP_BRAZERO: |
| 834 |
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
| 835 |
case OP_BRAPOSZERO: |
case OP_BRAPOSZERO: |
| 836 |
if (set_start_bits(++tcode, start_bits, utf8, cd) == SSB_FAIL) |
rc = set_start_bits(++tcode, start_bits, utf8, cd); |
| 837 |
return SSB_FAIL; |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
| 838 |
/* ========================================================================= |
/* ========================================================================= |
| 839 |
See the comment at the head of this function concerning the next line, |
See the comment at the head of this function concerning the next line, |
| 840 |
which was an old fudge for the benefit of OS/2. |
which was an old fudge for the benefit of OS/2. |
| 1136 |
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; |
for (c = 0; c < 32; c++) start_bits[c] |= tcode[c]; |
| 1137 |
} |
} |
| 1138 |
|
|
| 1139 |
/* Advance past the bit map, and act on what follows */ |
/* Advance past the bit map, and act on what follows. For a zero |
| 1140 |
|
minimum repeat, continue; otherwise stop processing. */ |
| 1141 |
|
|
| 1142 |
tcode += 32; |
tcode += 32; |
| 1143 |
switch (*tcode) |
switch (*tcode) |
| 1154 |
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; |
if (((tcode[1] << 8) + tcode[2]) == 0) tcode += 5; |
| 1155 |
else try_next = FALSE; |
else try_next = FALSE; |
| 1156 |
break; |
break; |
| 1157 |
|
|
| 1158 |
default: |
default: |
| 1159 |
try_next = FALSE; |
try_next = FALSE; |
| 1160 |
break; |
break; |
| 1173 |
|
|
| 1174 |
|
|
| 1175 |
|
|
| 1176 |
|
|
| 1177 |
|
|
| 1178 |
/************************************************* |
/************************************************* |
| 1179 |
* Study a compiled expression * |
* Study a compiled expression * |
| 1180 |
*************************************************/ |
*************************************************/ |
| 1231 |
if ((re->options & PCRE_ANCHORED) == 0 && |
if ((re->options & PCRE_ANCHORED) == 0 && |
| 1232 |
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0) |
(re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0) |
| 1233 |
{ |
{ |
| 1234 |
|
int rc; |
| 1235 |
|
|
| 1236 |
/* Set the character tables in the block that is passed around */ |
/* Set the character tables in the block that is passed around */ |
| 1237 |
|
|
| 1238 |
tables = re->tables; |
tables = re->tables; |
| 1248 |
/* See if we can find a fixed set of initial characters for the pattern. */ |
/* See if we can find a fixed set of initial characters for the pattern. */ |
| 1249 |
|
|
| 1250 |
memset(start_bits, 0, 32 * sizeof(uschar)); |
memset(start_bits, 0, 32 * sizeof(uschar)); |
| 1251 |
bits_set = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, |
rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0, |
| 1252 |
&compile_block) == SSB_DONE; |
&compile_block); |
| 1253 |
|
bits_set = rc == SSB_DONE; |
| 1254 |
|
if (rc == SSB_UNKNOWN) *errorptr = "internal error: opcode not recognized"; |
| 1255 |
} |
} |
| 1256 |
|
|
| 1257 |
/* Find the minimum length of subject string. */ |
/* Find the minimum length of subject string. */ |
| 1259 |
switch(min = find_minlength(code, code, re->options)) |
switch(min = find_minlength(code, code, re->options)) |
| 1260 |
{ |
{ |
| 1261 |
case -2: *errorptr = "internal error: missing capturing bracket"; break; |
case -2: *errorptr = "internal error: missing capturing bracket"; break; |
| 1262 |
case -3: *errorptr = "internal error: opcode not recognized"; break; |
case -3: *errorptr = "internal error: opcode not recognized"; break; |
| 1263 |
default: break; |
default: break; |
| 1264 |
} |
} |
| 1265 |
|
|
| 1266 |
/* Return NULL if no optimization is possible. */ |
/* Return NULL if there's been an error or if no optimization is possible. */ |
| 1267 |
|
|
| 1268 |
if (!bits_set && min < 0) return NULL; |
if (*errorptr != NULL || (!bits_set && min < 0)) return NULL; |
| 1269 |
|
|
| 1270 |
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in |
/* Get a pcre_extra block and a pcre_study_data block. The study data is put in |
| 1271 |
the latter, which is pointed to by the former, which may also get additional |
the latter, which is pointed to by the former, which may also get additional |