| 519 |
{ |
{ |
| 520 |
register int c; |
register int c; |
| 521 |
int yield = SSB_DONE; |
int yield = SSB_DONE; |
| 522 |
|
int table_limit = utf8? 16:32; |
| 523 |
|
|
| 524 |
#if 0 |
#if 0 |
| 525 |
/* ========================================================================= */ |
/* ========================================================================= */ |
| 677 |
case OP_HSPACE: |
case OP_HSPACE: |
| 678 |
SET_BIT(0x09); |
SET_BIT(0x09); |
| 679 |
SET_BIT(0x20); |
SET_BIT(0x20); |
|
SET_BIT(0xA0); |
|
| 680 |
if (utf8) |
if (utf8) |
| 681 |
{ |
{ |
| 682 |
|
SET_BIT(0xC2); /* For U+00A0 */ |
| 683 |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
| 684 |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
| 685 |
SET_BIT(0xE3); /* For U+3000 */ |
SET_BIT(0xE3); /* For U+3000 */ |
| 686 |
} |
} |
| 687 |
|
else SET_BIT(0xA0); |
| 688 |
try_next = FALSE; |
try_next = FALSE; |
| 689 |
break; |
break; |
| 690 |
|
|
| 694 |
SET_BIT(0x0B); |
SET_BIT(0x0B); |
| 695 |
SET_BIT(0x0C); |
SET_BIT(0x0C); |
| 696 |
SET_BIT(0x0D); |
SET_BIT(0x0D); |
| 697 |
SET_BIT(0x85); |
if (utf8) |
| 698 |
if (utf8) SET_BIT(0xE2); /* For U+2028, U+2029 */ |
{ |
| 699 |
|
SET_BIT(0xC2); /* For U+0085 */ |
| 700 |
|
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
| 701 |
|
} |
| 702 |
|
else SET_BIT(0x85); |
| 703 |
try_next = FALSE; |
try_next = FALSE; |
| 704 |
break; |
break; |
| 705 |
|
|
| 706 |
/* Single character types set the bits and stop. Note that if PCRE_UCP |
/* Single character types set the bits and stop. Note that if PCRE_UCP |
| 707 |
is set, we do not see these op codes because \d etc are converted to |
is set, we do not see these op codes because \d etc are converted to |
| 708 |
properties. Therefore, these apply in the case when only ASCII characters |
properties. Therefore, these apply in the case when only ASCII characters |
| 709 |
are recognized to match the types. */ |
are recognized to match the types. In UTF-8 mode, we must restrict |
| 710 |
|
ourselves to bytes less than 128, as otherwise there can be confusion |
| 711 |
|
with bytes in the middle of UTF-8 characters. (In a "traditional" |
| 712 |
|
environment, the tables will only recognize ASCII characters anyway, but |
| 713 |
|
in at least one Windows environment, some higher bytes bits were set in |
| 714 |
|
the tables.) */ |
| 715 |
|
|
| 716 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 717 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 718 |
start_bits[c] |= ~cd->cbits[c+cbit_digit]; |
start_bits[c] |= ~cd->cbits[c+cbit_digit]; |
| 719 |
try_next = FALSE; |
try_next = FALSE; |
| 720 |
break; |
break; |
| 721 |
|
|
| 722 |
case OP_DIGIT: |
case OP_DIGIT: |
| 723 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 724 |
start_bits[c] |= cd->cbits[c+cbit_digit]; |
start_bits[c] |= cd->cbits[c+cbit_digit]; |
| 725 |
try_next = FALSE; |
try_next = FALSE; |
| 726 |
break; |
break; |
| 729 |
discard it. */ |
discard it. */ |
| 730 |
|
|
| 731 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 732 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 733 |
{ |
{ |
| 734 |
int d = cd->cbits[c+cbit_space]; |
int d = cd->cbits[c+cbit_space]; |
| 735 |
if (c == 1) d &= ~0x08; |
if (c == 1) d &= ~0x08; |
| 742 |
discard it. */ |
discard it. */ |
| 743 |
|
|
| 744 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
| 745 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 746 |
{ |
{ |
| 747 |
int d = cd->cbits[c+cbit_space]; |
int d = cd->cbits[c+cbit_space]; |
| 748 |
if (c == 1) d &= ~0x08; |
if (c == 1) d &= ~0x08; |
| 752 |
break; |
break; |
| 753 |
|
|
| 754 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 755 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 756 |
start_bits[c] |= ~cd->cbits[c+cbit_word]; |
start_bits[c] |= ~cd->cbits[c+cbit_word]; |
| 757 |
try_next = FALSE; |
try_next = FALSE; |
| 758 |
break; |
break; |
| 759 |
|
|
| 760 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 761 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 762 |
start_bits[c] |= cd->cbits[c+cbit_word]; |
start_bits[c] |= cd->cbits[c+cbit_word]; |
| 763 |
try_next = FALSE; |
try_next = FALSE; |
| 764 |
break; |
break; |
| 800 |
case OP_HSPACE: |
case OP_HSPACE: |
| 801 |
SET_BIT(0x09); |
SET_BIT(0x09); |
| 802 |
SET_BIT(0x20); |
SET_BIT(0x20); |
|
SET_BIT(0xA0); |
|
| 803 |
if (utf8) |
if (utf8) |
| 804 |
{ |
{ |
| 805 |
|
SET_BIT(0xC2); /* For U+00A0 */ |
| 806 |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
SET_BIT(0xE1); /* For U+1680, U+180E */ |
| 807 |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
SET_BIT(0xE2); /* For U+2000 - U+200A, U+202F, U+205F */ |
| 808 |
SET_BIT(0xE3); /* For U+3000 */ |
SET_BIT(0xE3); /* For U+3000 */ |
| 809 |
} |
} |
| 810 |
|
else SET_BIT(0xA0); |
| 811 |
break; |
break; |
| 812 |
|
|
| 813 |
case OP_ANYNL: |
case OP_ANYNL: |
| 816 |
SET_BIT(0x0B); |
SET_BIT(0x0B); |
| 817 |
SET_BIT(0x0C); |
SET_BIT(0x0C); |
| 818 |
SET_BIT(0x0D); |
SET_BIT(0x0D); |
| 819 |
SET_BIT(0x85); |
if (utf8) |
| 820 |
if (utf8) SET_BIT(0xE2); /* For U+2028, U+2029 */ |
{ |
| 821 |
|
SET_BIT(0xC2); /* For U+0085 */ |
| 822 |
|
SET_BIT(0xE2); /* For U+2028, U+2029 */ |
| 823 |
|
} |
| 824 |
|
else SET_BIT(0x85); |
| 825 |
break; |
break; |
| 826 |
|
|
| 827 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 828 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 829 |
start_bits[c] |= ~cd->cbits[c+cbit_digit]; |
start_bits[c] |= ~cd->cbits[c+cbit_digit]; |
| 830 |
break; |
break; |
| 831 |
|
|
| 832 |
case OP_DIGIT: |
case OP_DIGIT: |
| 833 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 834 |
start_bits[c] |= cd->cbits[c+cbit_digit]; |
start_bits[c] |= cd->cbits[c+cbit_digit]; |
| 835 |
break; |
break; |
| 836 |
|
|
| 838 |
discard it. */ |
discard it. */ |
| 839 |
|
|
| 840 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 841 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 842 |
{ |
{ |
| 843 |
int d = cd->cbits[c+cbit_space]; |
int d = cd->cbits[c+cbit_space]; |
| 844 |
if (c == 1) d &= ~0x08; |
if (c == 1) d &= ~0x08; |
| 850 |
discard it. */ |
discard it. */ |
| 851 |
|
|
| 852 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
| 853 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 854 |
{ |
{ |
| 855 |
int d = cd->cbits[c+cbit_space]; |
int d = cd->cbits[c+cbit_space]; |
| 856 |
if (c == 1) d &= ~0x08; |
if (c == 1) d &= ~0x08; |
| 859 |
break; |
break; |
| 860 |
|
|
| 861 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 862 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 863 |
start_bits[c] |= ~cd->cbits[c+cbit_word]; |
start_bits[c] |= ~cd->cbits[c+cbit_word]; |
| 864 |
break; |
break; |
| 865 |
|
|
| 866 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 867 |
for (c = 0; c < 32; c++) |
for (c = 0; c < table_limit; c++) |
| 868 |
start_bits[c] |= cd->cbits[c+cbit_word]; |
start_bits[c] |= cd->cbits[c+cbit_word]; |
| 869 |
break; |
break; |
| 870 |
} |
} |