| 82 |
int recurse_depth) |
int recurse_depth) |
| 83 |
{ |
{ |
| 84 |
int length = -1; |
int length = -1; |
| 85 |
BOOL utf8 = (options & PCRE_UTF8) != 0; |
/* PCRE_UTF16 has the same value as PCRE_UTF8. */ |
| 86 |
|
BOOL utf = (options & PCRE_UTF8) != 0; |
| 87 |
BOOL had_recurse = FALSE; |
BOOL had_recurse = FALSE; |
| 88 |
register int branchlength = 0; |
register int branchlength = 0; |
| 89 |
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; |
register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE; |
| 224 |
case OP_NOTPOSPLUSI: |
case OP_NOTPOSPLUSI: |
| 225 |
branchlength++; |
branchlength++; |
| 226 |
cc += 2; |
cc += 2; |
| 227 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
| 228 |
if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f]; |
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
| 229 |
#endif |
#endif |
| 230 |
break; |
break; |
| 231 |
|
|
| 245 |
case OP_NOTEXACTI: |
case OP_NOTEXACTI: |
| 246 |
branchlength += GET2(cc,1); |
branchlength += GET2(cc,1); |
| 247 |
cc += 2 + IMM2_SIZE; |
cc += 2 + IMM2_SIZE; |
| 248 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
| 249 |
if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f]; |
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
| 250 |
#endif |
#endif |
| 251 |
break; |
break; |
| 252 |
|
|
| 293 |
appear, but leave the code, just in case.) */ |
appear, but leave the code, just in case.) */ |
| 294 |
|
|
| 295 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 296 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
| 297 |
if (utf8) return -1; |
if (utf) return -1; |
| 298 |
#endif |
#endif |
| 299 |
branchlength++; |
branchlength++; |
| 300 |
cc++; |
cc++; |
| 323 |
|
|
| 324 |
/* Check a class for variable quantification */ |
/* Check a class for variable quantification */ |
| 325 |
|
|
| 326 |
#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8 |
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
| 327 |
case OP_XCLASS: |
case OP_XCLASS: |
| 328 |
cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS]; |
cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS]; |
| 329 |
/* Fall through */ |
/* Fall through */ |
| 375 |
case OP_REFI: |
case OP_REFI: |
| 376 |
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) |
if ((options & PCRE_JAVASCRIPT_COMPAT) == 0) |
| 377 |
{ |
{ |
| 378 |
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf8, GET2(cc, 1)); |
ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1)); |
| 379 |
if (cs == NULL) return -2; |
if (cs == NULL) return -2; |
| 380 |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
do ce += GET(ce, 1); while (*ce == OP_ALT); |
| 381 |
if (cc > cs && cc < ce) |
if (cc > cs && cc < ce) |
| 486 |
case OP_NOTPOSQUERYI: |
case OP_NOTPOSQUERYI: |
| 487 |
|
|
| 488 |
cc += PRIV(OP_lengths)[op]; |
cc += PRIV(OP_lengths)[op]; |
| 489 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
| 490 |
if (utf8 && cc[-1] >= 0xc0) cc += PRIV(utf8_table4)[cc[-1] & 0x3f]; |
if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]); |
| 491 |
#endif |
#endif |
| 492 |
break; |
break; |
| 493 |
|
|
| 538 |
p points to the character |
p points to the character |
| 539 |
caseless the caseless flag |
caseless the caseless flag |
| 540 |
cd the block with char table pointers |
cd the block with char table pointers |
| 541 |
utf8 TRUE for UTF-8 mode |
utf TRUE for UTF-8 / UTF-16 mode |
| 542 |
|
|
| 543 |
Returns: pointer after the character |
Returns: pointer after the character |
| 544 |
*/ |
*/ |
| 545 |
|
|
| 546 |
static const pcre_uchar * |
static const pcre_uchar * |
| 547 |
set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless, |
set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless, |
| 548 |
compile_data *cd, BOOL utf8) |
compile_data *cd, BOOL utf) |
| 549 |
{ |
{ |
| 550 |
unsigned int c = *p; |
unsigned int c = *p; |
| 551 |
|
|
| 552 |
|
#ifdef COMPILE_PCRE8 |
| 553 |
SET_BIT(c); |
SET_BIT(c); |
| 554 |
|
|
| 555 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF |
| 556 |
if (utf8 && c > 127) |
if (utf && c > 127) |
| 557 |
{ |
{ |
| 558 |
GETCHARINC(c, p); |
GETCHARINC(c, p); |
| 559 |
#ifdef SUPPORT_UCP |
#ifdef SUPPORT_UCP |
| 560 |
if (caseless) |
if (caseless) |
| 561 |
{ |
{ |
| 562 |
pcre_uint8 buff[8]; |
pcre_uchar buff[6]; |
| 563 |
c = UCD_OTHERCASE(c); |
c = UCD_OTHERCASE(c); |
| 564 |
(void)PRIV(ord2utf8)(c, buff); |
(void)PRIV(ord2utf)(c, buff); |
| 565 |
SET_BIT(buff[0]); |
SET_BIT(buff[0]); |
| 566 |
} |
} |
| 567 |
#endif |
#endif |
| 573 |
|
|
| 574 |
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); |
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); |
| 575 |
return p + 1; |
return p + 1; |
| 576 |
|
#endif |
| 577 |
|
|
| 578 |
|
#ifdef COMPILE_PCRE16 |
| 579 |
|
if (c > 0xff) |
| 580 |
|
c = 0xff; |
| 581 |
|
SET_BIT(c); |
| 582 |
|
|
| 583 |
|
#ifdef SUPPORT_UTF |
| 584 |
|
if (utf && c > 127) |
| 585 |
|
{ |
| 586 |
|
GETCHARINC(c, p); |
| 587 |
|
#ifdef SUPPORT_UCP |
| 588 |
|
if (caseless) |
| 589 |
|
{ |
| 590 |
|
c = UCD_OTHERCASE(c); |
| 591 |
|
if (c > 0xff) |
| 592 |
|
c = 0xff; |
| 593 |
|
SET_BIT(c); |
| 594 |
|
} |
| 595 |
|
#endif |
| 596 |
|
return p; |
| 597 |
|
} |
| 598 |
|
#endif |
| 599 |
|
|
| 600 |
|
if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]); |
| 601 |
|
return p + 1; |
| 602 |
|
#endif |
| 603 |
} |
} |
| 604 |
|
|
| 605 |
|
|
| 630 |
{ |
{ |
| 631 |
register int c; |
register int c; |
| 632 |
for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; |
for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; |
| 633 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| 634 |
if (table_limit == 32) return; |
if (table_limit == 32) return; |
| 635 |
for (c = 128; c < 256; c++) |
for (c = 128; c < 256; c++) |
| 636 |
{ |
{ |
| 637 |
if ((cd->cbits[c/8] & (1 << (c&7))) != 0) |
if ((cd->cbits[c/8] & (1 << (c&7))) != 0) |
| 638 |
{ |
{ |
| 639 |
pcre_uint8 buff[8]; |
pcre_uchar buff[6]; |
| 640 |
(void)PRIV(ord2utf8)(c, buff); |
(void)PRIV(ord2utf)(c, buff); |
| 641 |
SET_BIT(buff[0]); |
SET_BIT(buff[0]); |
| 642 |
} |
} |
| 643 |
} |
} |
| 672 |
{ |
{ |
| 673 |
register int c; |
register int c; |
| 674 |
for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; |
for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; |
| 675 |
|
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| 676 |
if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; |
if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; |
| 677 |
|
#endif |
| 678 |
} |
} |
| 679 |
|
|
| 680 |
|
|
| 694 |
Arguments: |
Arguments: |
| 695 |
code points to an expression |
code points to an expression |
| 696 |
start_bits points to a 32-byte table, initialized to 0 |
start_bits points to a 32-byte table, initialized to 0 |
| 697 |
utf8 TRUE if in UTF-8 mode |
utf TRUE if in UTF-8 / UTF-16 mode |
| 698 |
cd the block with char table pointers |
cd the block with char table pointers |
| 699 |
|
|
| 700 |
Returns: SSB_FAIL => Failed to find any starting bytes |
Returns: SSB_FAIL => Failed to find any starting bytes |
| 704 |
*/ |
*/ |
| 705 |
|
|
| 706 |
static int |
static int |
| 707 |
set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf8, |
set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf, |
| 708 |
compile_data *cd) |
compile_data *cd) |
| 709 |
{ |
{ |
| 710 |
register int c; |
register int c; |
| 711 |
int yield = SSB_DONE; |
int yield = SSB_DONE; |
| 712 |
int table_limit = utf8? 16:32; |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| 713 |
|
int table_limit = utf? 16:32; |
| 714 |
|
#else |
| 715 |
|
int table_limit = 32; |
| 716 |
|
#endif |
| 717 |
|
|
| 718 |
#if 0 |
#if 0 |
| 719 |
/* ========================================================================= */ |
/* ========================================================================= */ |
| 824 |
case OP_SOM: |
case OP_SOM: |
| 825 |
case OP_THEN: |
case OP_THEN: |
| 826 |
case OP_THEN_ARG: |
case OP_THEN_ARG: |
| 827 |
#if defined SUPPORT_UTF8 || !defined COMPILE_PCRE8 |
#if defined SUPPORT_UTF || !defined COMPILE_PCRE8 |
| 828 |
case OP_XCLASS: |
case OP_XCLASS: |
| 829 |
#endif |
#endif |
| 830 |
return SSB_FAIL; |
return SSB_FAIL; |
| 852 |
case OP_ONCE: |
case OP_ONCE: |
| 853 |
case OP_ONCE_NC: |
case OP_ONCE_NC: |
| 854 |
case OP_ASSERT: |
case OP_ASSERT: |
| 855 |
rc = set_start_bits(tcode, start_bits, utf8, cd); |
rc = set_start_bits(tcode, start_bits, utf, cd); |
| 856 |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
| 857 |
if (rc == SSB_DONE) try_next = FALSE; else |
if (rc == SSB_DONE) try_next = FALSE; else |
| 858 |
{ |
{ |
| 899 |
case OP_BRAZERO: |
case OP_BRAZERO: |
| 900 |
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
| 901 |
case OP_BRAPOSZERO: |
case OP_BRAPOSZERO: |
| 902 |
rc = set_start_bits(++tcode, start_bits, utf8, cd); |
rc = set_start_bits(++tcode, start_bits, utf, cd); |
| 903 |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc; |
| 904 |
/* ========================================================================= |
/* ========================================================================= |
| 905 |
See the comment at the head of this function concerning the next line, |
See the comment at the head of this function concerning the next line, |
| 926 |
case OP_QUERY: |
case OP_QUERY: |
| 927 |
case OP_MINQUERY: |
case OP_MINQUERY: |
| 928 |
case OP_POSQUERY: |
case OP_POSQUERY: |
| 929 |
tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8); |
tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); |
| 930 |
break; |
break; |
| 931 |
|
|
| 932 |
case OP_STARI: |
case OP_STARI: |
| 935 |
case OP_QUERYI: |
case OP_QUERYI: |
| 936 |
case OP_MINQUERYI: |
case OP_MINQUERYI: |
| 937 |
case OP_POSQUERYI: |
case OP_POSQUERYI: |
| 938 |
tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8); |
tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); |
| 939 |
break; |
break; |
| 940 |
|
|
| 941 |
/* Single-char upto sets the bit and tries the next */ |
/* Single-char upto sets the bit and tries the next */ |
| 943 |
case OP_UPTO: |
case OP_UPTO: |
| 944 |
case OP_MINUPTO: |
case OP_MINUPTO: |
| 945 |
case OP_POSUPTO: |
case OP_POSUPTO: |
| 946 |
tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf8); |
tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf); |
| 947 |
break; |
break; |
| 948 |
|
|
| 949 |
case OP_UPTOI: |
case OP_UPTOI: |
| 950 |
case OP_MINUPTOI: |
case OP_MINUPTOI: |
| 951 |
case OP_POSUPTOI: |
case OP_POSUPTOI: |
| 952 |
tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf8); |
tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf); |
| 953 |
break; |
break; |
| 954 |
|
|
| 955 |
/* At least one single char sets the bit and stops */ |
/* At least one single char sets the bit and stops */ |
| 961 |
case OP_PLUS: |
case OP_PLUS: |
| 962 |
case OP_MINPLUS: |
case OP_MINPLUS: |
| 963 |
case OP_POSPLUS: |
case OP_POSPLUS: |
| 964 |
(void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf8); |
(void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf); |
| 965 |
try_next = FALSE; |
try_next = FALSE; |
| 966 |
break; |
break; |
| 967 |
|
|
| 972 |
case OP_PLUSI: |
case OP_PLUSI: |
| 973 |
case OP_MINPLUSI: |
case OP_MINPLUSI: |
| 974 |
case OP_POSPLUSI: |
case OP_POSPLUSI: |
| 975 |
(void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf8); |
(void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf); |
| 976 |
try_next = FALSE; |
try_next = FALSE; |
| 977 |
break; |
break; |
| 978 |
|
|
| 985 |
case OP_HSPACE: |
case OP_HSPACE: |
| 986 |
SET_BIT(0x09); |
SET_BIT(0x09); |
| 987 |
SET_BIT(0x20); |
SET_BIT(0x20); |
| 988 |
if (utf8) |
#ifdef SUPPORT_UTF |
| 989 |
|
if (utf) |
| 990 |
{ |
{ |
| 991 |
|
#ifdef COMPILE_PCRE8 |
| 992 |
SET_BIT(0xC2); /* For U+00A0 */ |
SET_BIT(0xC2); /* For U+00A0 */ |
| 993 |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
| 994 |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
| 995 |
SET_BIT(0xE3); /* For U+3000 */ |
SET_BIT(0xE3); /* For U+3000 */ |
| 996 |
|
#endif |
| 997 |
|
#ifdef COMPILE_PCRE16 |
| 998 |
|
SET_BIT(0xA0); |
| 999 |
|
SET_BIT(0xFF); /* For characters > 255 */ |
| 1000 |
|
#endif |
| 1001 |
} |
} |
| 1002 |
else SET_BIT(0xA0); |
else |
| 1003 |
|
#endif /* SUPPORT_UTF */ |
| 1004 |
|
SET_BIT(0xA0); |
| 1005 |
try_next = FALSE; |
try_next = FALSE; |
| 1006 |
break; |
break; |
| 1007 |
|
|
| 1011 |
SET_BIT(0x0B); |
SET_BIT(0x0B); |
| 1012 |
SET_BIT(0x0C); |
SET_BIT(0x0C); |
| 1013 |
SET_BIT(0x0D); |
SET_BIT(0x0D); |
| 1014 |
if (utf8) |
#ifdef SUPPORT_UTF |
| 1015 |
|
if (utf) |
| 1016 |
{ |
{ |
| 1017 |
|
#ifdef COMPILE_PCRE8 |
| 1018 |
SET_BIT(0xC2); /* For U+0085 */ |
SET_BIT(0xC2); /* For U+0085 */ |
| 1019 |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
| 1020 |
|
#endif |
| 1021 |
|
#ifdef COMPILE_PCRE16 |
| 1022 |
|
SET_BIT(0x85); |
| 1023 |
|
SET_BIT(0xFF); /* For characters > 255 */ |
| 1024 |
|
#endif |
| 1025 |
} |
} |
| 1026 |
else SET_BIT(0x85); |
else |
| 1027 |
|
#endif /* SUPPORT_UTF */ |
| 1028 |
|
SET_BIT(0x85); |
| 1029 |
try_next = FALSE; |
try_next = FALSE; |
| 1030 |
break; |
break; |
| 1031 |
|
|
| 1110 |
case OP_HSPACE: |
case OP_HSPACE: |
| 1111 |
SET_BIT(0x09); |
SET_BIT(0x09); |
| 1112 |
SET_BIT(0x20); |
SET_BIT(0x20); |
| 1113 |
if (utf8) |
#ifdef COMPILE_PCRE8 |
| 1114 |
|
if (utf) |
| 1115 |
{ |
{ |
| 1116 |
|
#ifdef COMPILE_PCRE8 |
| 1117 |
SET_BIT(0xC2); /* For U+00A0 */ |
SET_BIT(0xC2); /* For U+00A0 */ |
| 1118 |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
| 1119 |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
| 1120 |
SET_BIT(0xE3); /* For U+3000 */ |
SET_BIT(0xE3); /* For U+3000 */ |
| 1121 |
|
#endif |
| 1122 |
|
#ifdef COMPILE_PCRE16 |
| 1123 |
|
SET_BIT(0xA0); |
| 1124 |
|
SET_BIT(0xFF); /* For characters > 255 */ |
| 1125 |
|
#endif |
| 1126 |
} |
} |
| 1127 |
else SET_BIT(0xA0); |
else |
| 1128 |
|
#endif /* SUPPORT_UTF */ |
| 1129 |
|
SET_BIT(0xA0); |
| 1130 |
break; |
break; |
| 1131 |
|
|
| 1132 |
case OP_ANYNL: |
case OP_ANYNL: |
| 1135 |
SET_BIT(0x0B); |
SET_BIT(0x0B); |
| 1136 |
SET_BIT(0x0C); |
SET_BIT(0x0C); |
| 1137 |
SET_BIT(0x0D); |
SET_BIT(0x0D); |
| 1138 |
if (utf8) |
#ifdef COMPILE_PCRE8 |
| 1139 |
|
if (utf) |
| 1140 |
{ |
{ |
| 1141 |
|
#ifdef COMPILE_PCRE8 |
| 1142 |
SET_BIT(0xC2); /* For U+0085 */ |
SET_BIT(0xC2); /* For U+0085 */ |
| 1143 |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
| 1144 |
|
#endif |
| 1145 |
|
#ifdef COMPILE_PCRE16 |
| 1146 |
|
SET_BIT(0x85); |
| 1147 |
|
SET_BIT(0xFF); /* For characters > 255 */ |
| 1148 |
|
#endif |
| 1149 |
} |
} |
| 1150 |
else SET_BIT(0x85); |
else |
| 1151 |
|
#endif /* SUPPORT_UTF */ |
| 1152 |
|
SET_BIT(0x85); |
| 1153 |
break; |
break; |
| 1154 |
|
|
| 1155 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 1196 |
character with a value > 255. */ |
character with a value > 255. */ |
| 1197 |
|
|
| 1198 |
case OP_NCLASS: |
case OP_NCLASS: |
| 1199 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| 1200 |
if (utf8) |
if (utf) |
| 1201 |
{ |
{ |
| 1202 |
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ |
start_bits[24] |= 0xf0; /* Bits for 0xc4 - 0xc8 */ |
| 1203 |
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ |
memset(start_bits+25, 0xff, 7); /* Bits for 0xc9 - 0xff */ |
| 1204 |
} |
} |
| 1205 |
#endif |
#endif |
| 1206 |
|
#ifdef COMPILE_PCRE16 |
| 1207 |
|
SET_BIT(0xFF); /* For characters > 255 */ |
| 1208 |
|
#endif |
| 1209 |
/* Fall through */ |
/* Fall through */ |
| 1210 |
|
|
| 1211 |
case OP_CLASS: |
case OP_CLASS: |
| 1220 |
value is > 127. In fact, there are only two possible starting bytes for |
value is > 127. In fact, there are only two possible starting bytes for |
| 1221 |
characters in the range 128 - 255. */ |
characters in the range 128 - 255. */ |
| 1222 |
|
|
| 1223 |
#ifdef SUPPORT_UTF8 |
#if defined SUPPORT_UTF && defined COMPILE_PCRE8 |
| 1224 |
if (utf8) |
if (utf) |
| 1225 |
{ |
{ |
| 1226 |
for (c = 0; c < 16; c++) start_bits[c] |= map[c]; |
for (c = 0; c < 16; c++) start_bits[c] |= map[c]; |
| 1227 |
for (c = 128; c < 256; c++) |
for (c = 128; c < 256; c++) |
| 1234 |
} |
} |
| 1235 |
} |
} |
| 1236 |
} |
} |
|
|
|
|
/* In non-UTF-8 mode, the two bit maps are completely compatible. */ |
|
|
|
|
| 1237 |
else |
else |
| 1238 |
#endif |
#endif |
| 1239 |
{ |
{ |
| 1240 |
|
/* In non-UTF-8 mode, the two bit maps are completely compatible. */ |
| 1241 |
for (c = 0; c < 32; c++) start_bits[c] |= map[c]; |
for (c = 0; c < 32; c++) start_bits[c] |= map[c]; |
| 1242 |
} |
} |
| 1243 |
|
|
| 1325 |
return NULL; |
return NULL; |
| 1326 |
} |
} |
| 1327 |
|
|
| 1328 |
|
if ((re->flags & PCRE_MODE) == 0) |
| 1329 |
|
{ |
| 1330 |
|
#ifdef COMPILE_PCRE8 |
| 1331 |
|
*errorptr = "argument is compiled in 16 bit mode"; |
| 1332 |
|
#else |
| 1333 |
|
*errorptr = "argument is compiled in 8 bit mode"; |
| 1334 |
|
#endif |
| 1335 |
|
return NULL; |
| 1336 |
|
} |
| 1337 |
|
|
| 1338 |
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) |
if ((options & ~PUBLIC_STUDY_OPTIONS) != 0) |
| 1339 |
{ |
{ |
| 1340 |
*errorptr = "unknown or incorrect option bit(s) set"; |
*errorptr = "unknown or incorrect option bit(s) set"; |
| 1356 |
/* Set the character tables in the block that is passed around */ |
/* Set the character tables in the block that is passed around */ |
| 1357 |
|
|
| 1358 |
tables = re->tables; |
tables = re->tables; |
| 1359 |
|
|
| 1360 |
|
#ifdef COMPILE_PCRE8 |
| 1361 |
if (tables == NULL) |
if (tables == NULL) |
| 1362 |
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
(void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
| 1363 |
(void *)(&tables)); |
(void *)(&tables)); |
| 1364 |
|
#else |
| 1365 |
|
if (tables == NULL) |
| 1366 |
|
(void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES, |
| 1367 |
|
(void *)(&tables)); |
| 1368 |
|
#endif |
| 1369 |
|
|
| 1370 |
compile_block.lcc = tables + lcc_offset; |
compile_block.lcc = tables + lcc_offset; |
| 1371 |
compile_block.fcc = tables + fcc_offset; |
compile_block.fcc = tables + fcc_offset; |
| 1430 |
memcpy(study->start_bits, start_bits, sizeof(start_bits)); |
memcpy(study->start_bits, start_bits, sizeof(start_bits)); |
| 1431 |
} |
} |
| 1432 |
|
|
| 1433 |
|
#ifdef PCRE_DEBUG |
| 1434 |
|
if (bits_set) |
| 1435 |
|
{ |
| 1436 |
|
pcre_uint8 *ptr = (pcre_uint32 *)start_bits; |
| 1437 |
|
int i; |
| 1438 |
|
|
| 1439 |
|
printf("Start bits:\n"); |
| 1440 |
|
for (i = 0; i < 32; i++) |
| 1441 |
|
printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n"); |
| 1442 |
|
} |
| 1443 |
|
#endif |
| 1444 |
|
|
| 1445 |
/* Always set the minlength value in the block, because the JIT compiler |
/* Always set the minlength value in the block, because the JIT compiler |
| 1446 |
makes use of it. However, don't set the bit unless the length is greater than |
makes use of it. However, don't set the bit unless the length is greater than |
| 1447 |
zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time |
zero - the interpretive pcre_exec() and pcre_dfa_exec() needn't waste time |