| 504 |
start_bits the starting bitmap |
start_bits the starting bitmap |
| 505 |
cbit type the type of character wanted |
cbit type the type of character wanted |
| 506 |
table_limit 32 for non-UTF-8; 16 for UTF-8 |
table_limit 32 for non-UTF-8; 16 for UTF-8 |
| 507 |
cd the block with char table pointers |
cd the block with char table pointers |
| 508 |
|
|
| 509 |
Returns: nothing |
Returns: nothing |
| 510 |
*/ |
*/ |
| 513 |
set_type_bits(uschar *start_bits, int cbit_type, int table_limit, |
set_type_bits(uschar *start_bits, int cbit_type, int table_limit, |
| 514 |
compile_data *cd) |
compile_data *cd) |
| 515 |
{ |
{ |
| 516 |
register int c; |
register int c; |
| 517 |
for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; |
for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type]; |
| 518 |
if (table_limit == 32) return; |
if (table_limit == 32) return; |
| 519 |
for (c = 128; c < 256; c++) |
for (c = 128; c < 256; c++) |
| 522 |
{ |
{ |
| 523 |
uschar buff[8]; |
uschar buff[8]; |
| 524 |
(void)_pcre_ord2utf8(c, buff); |
(void)_pcre_ord2utf8(c, buff); |
| 525 |
SET_BIT(buff[0]); |
SET_BIT(buff[0]); |
| 526 |
} |
} |
| 527 |
} |
} |
| 528 |
} |
} |
| 529 |
|
|
| 530 |
|
|
| 535 |
/* This function sets starting bits for a negative character type such as \D. |
/* This function sets starting bits for a negative character type such as \D. |
| 536 |
In UTF-8 mode, we can only do a direct setting for bytes less than 128, as |
In UTF-8 mode, we can only do a direct setting for bytes less than 128, as |
| 537 |
otherwise there can be confusion with bytes in the middle of UTF-8 characters. |
otherwise there can be confusion with bytes in the middle of UTF-8 characters. |
| 538 |
Unlike in the positive case, where we can set appropriate starting bits for |
Unlike in the positive case, where we can set appropriate starting bits for |
| 539 |
specific high-valued UTF-8 characters, in this case we have to set the bits for |
specific high-valued UTF-8 characters, in this case we have to set the bits for |
| 540 |
all high-valued characters. The lowest is 0xc2, but we overkill by starting at |
all high-valued characters. The lowest is 0xc2, but we overkill by starting at |
| 541 |
0xc0 (192) for simplicity. |
0xc0 (192) for simplicity. |
| 542 |
|
|
| 543 |
Arguments: |
Arguments: |
| 544 |
start_bits the starting bitmap |
start_bits the starting bitmap |
| 545 |
cbit type the type of character wanted |
cbit type the type of character wanted |
| 546 |
table_limit 32 for non-UTF-8; 16 for UTF-8 |
table_limit 32 for non-UTF-8; 16 for UTF-8 |
| 547 |
cd the block with char table pointers |
cd the block with char table pointers |
| 548 |
|
|
| 549 |
Returns: nothing |
Returns: nothing |
| 550 |
*/ |
*/ |
| 553 |
set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit, |
set_nottype_bits(uschar *start_bits, int cbit_type, int table_limit, |
| 554 |
compile_data *cd) |
compile_data *cd) |
| 555 |
{ |
{ |
| 556 |
register int c; |
register int c; |
| 557 |
for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; |
for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type]; |
| 558 |
if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; |
if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff; |
| 559 |
} |
} |
| 750 |
SET_BIT(0x20); |
SET_BIT(0x20); |
| 751 |
if (utf8) |
if (utf8) |
| 752 |
{ |
{ |
| 753 |
SET_BIT(0xC2); /* For U+00A0 */ |
SET_BIT(0xC2); /* For U+00A0 */ |
| 754 |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
| 755 |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
| 756 |
SET_BIT(0xE3); /* For U+3000 */ |
SET_BIT(0xE3); /* For U+3000 */ |
| 765 |
SET_BIT(0x0B); |
SET_BIT(0x0B); |
| 766 |
SET_BIT(0x0C); |
SET_BIT(0x0C); |
| 767 |
SET_BIT(0x0D); |
SET_BIT(0x0D); |
| 768 |
if (utf8) |
if (utf8) |
| 769 |
{ |
{ |
| 770 |
SET_BIT(0xC2); /* For U+0085 */ |
SET_BIT(0xC2); /* For U+0085 */ |
| 771 |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
| 772 |
} |
} |
| 773 |
else SET_BIT(0x85); |
else SET_BIT(0x85); |
| 774 |
try_next = FALSE; |
try_next = FALSE; |
| 775 |
break; |
break; |
| 776 |
|
|
| 777 |
/* Single character types set the bits and stop. Note that if PCRE_UCP |
/* Single character types set the bits and stop. Note that if PCRE_UCP |
| 778 |
is set, we do not see these op codes because \d etc are converted to |
is set, we do not see these op codes because \d etc are converted to |
| 779 |
properties. Therefore, these apply in the case when only characters less |
properties. Therefore, these apply in the case when only characters less |
| 780 |
than 256 are recognized to match the types. */ |
than 256 are recognized to match the types. */ |
| 781 |
|
|
| 782 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 817 |
set_type_bits(start_bits, cbit_word, table_limit, cd); |
set_type_bits(start_bits, cbit_word, table_limit, cd); |
| 818 |
try_next = FALSE; |
try_next = FALSE; |
| 819 |
break; |
break; |
| 820 |
|
|
| 821 |
/* One or more character type fudges the pointer and restarts, knowing |
/* One or more character type fudges the pointer and restarts, knowing |
| 822 |
it will hit a single character type and stop there. */ |
it will hit a single character type and stop there. */ |
| 823 |
|
|
| 857 |
SET_BIT(0x20); |
SET_BIT(0x20); |
| 858 |
if (utf8) |
if (utf8) |
| 859 |
{ |
{ |
| 860 |
SET_BIT(0xC2); /* For U+00A0 */ |
SET_BIT(0xC2); /* For U+00A0 */ |
| 861 |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
| 862 |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
| 863 |
SET_BIT(0xE3); /* For U+3000 */ |
SET_BIT(0xE3); /* For U+3000 */ |
| 871 |
SET_BIT(0x0B); |
SET_BIT(0x0B); |
| 872 |
SET_BIT(0x0C); |
SET_BIT(0x0C); |
| 873 |
SET_BIT(0x0D); |
SET_BIT(0x0D); |
| 874 |
if (utf8) |
if (utf8) |
| 875 |
{ |
{ |
| 876 |
SET_BIT(0xC2); /* For U+0085 */ |
SET_BIT(0xC2); /* For U+0085 */ |
| 877 |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
| 878 |
} |
} |
| 879 |
else SET_BIT(0x85); |
else SET_BIT(0x85); |
| 880 |
break; |
break; |
| 881 |
|
|
| 892 |
|
|
| 893 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 894 |
set_nottype_bits(start_bits, cbit_space, table_limit, cd); |
set_nottype_bits(start_bits, cbit_space, table_limit, cd); |
| 895 |
start_bits[1] |= 0x08; |
start_bits[1] |= 0x08; |
| 896 |
break; |
break; |
| 897 |
|
|
| 898 |
/* The cbit_space table has vertical tab as whitespace; we have to |
/* The cbit_space table has vertical tab as whitespace; we have to |