| 54 |
static char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
static char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
| 55 |
static char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
static char rep_max[] = { 0, 0, 0, 0, 1, 1 }; |
| 56 |
|
|
| 57 |
/* Text forms of OP_ values and things, for debugging */ |
/* Text forms of OP_ values and things, for debugging (not all used) */ |
| 58 |
|
|
| 59 |
#ifdef DEBUG |
#ifdef DEBUG |
| 60 |
static const char *OP_names[] = { |
static const char *OP_names[] = { |
| 65 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
| 66 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
| 67 |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
| 68 |
"class", "Ref", |
"class", "negclass", "Ref", |
| 69 |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once", |
| 70 |
"Brazero", "Braminzero", "Bra" |
"Brazero", "Braminzero", "Bra" |
| 71 |
}; |
}; |
| 91 |
|
|
| 92 |
/* Definition to allow mutual recursion */ |
/* Definition to allow mutual recursion */ |
| 93 |
|
|
| 94 |
static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **); |
static BOOL |
| 95 |
|
compile_regex(int, int *, uschar **, const uschar **, const char **); |
| 96 |
|
|
| 97 |
/* Structure for passing "static" information around between the functions |
/* Structure for passing "static" information around between the functions |
| 98 |
doing the matching, so that they are thread-safe. */ |
doing the matching, so that they are thread-safe. */ |
| 307 |
/* Check a class or a back reference for a zero minimum */ |
/* Check a class or a back reference for a zero minimum */ |
| 308 |
|
|
| 309 |
case OP_CLASS: |
case OP_CLASS: |
| 310 |
|
case OP_NEGCLASS: |
| 311 |
case OP_REF: |
case OP_REF: |
| 312 |
cc += (*cc == OP_REF)? 2 : 33; |
cc += (*cc == OP_REF)? 2 : 33; |
| 313 |
|
|
| 672 |
|
|
| 673 |
case '[': |
case '[': |
| 674 |
previous = code; |
previous = code; |
|
*code++ = OP_CLASS; |
|
| 675 |
|
|
| 676 |
/* If the first character is '^', set the negation flag */ |
/* If the first character is '^', set the negation flag, and use a |
| 677 |
|
different opcode. This only matters if caseless matching is specified at |
| 678 |
|
runtime. */ |
| 679 |
|
|
| 680 |
if ((c = *(++ptr)) == '^') |
if ((c = *(++ptr)) == '^') |
| 681 |
{ |
{ |
| 682 |
negate_class = TRUE; |
negate_class = TRUE; |
| 683 |
|
*code++ = OP_NEGCLASS; |
| 684 |
c = *(++ptr); |
c = *(++ptr); |
| 685 |
} |
} |
| 686 |
else negate_class = FALSE; |
else |
| 687 |
|
{ |
| 688 |
|
negate_class = FALSE; |
| 689 |
|
*code++ = OP_CLASS; |
| 690 |
|
} |
| 691 |
|
|
| 692 |
/* Keep a count of chars so that we can optimize the case of just a single |
/* Keep a count of chars so that we can optimize the case of just a single |
| 693 |
character. */ |
character. */ |
| 1023 |
/* If previous was a character class or a back reference, we put the repeat |
/* If previous was a character class or a back reference, we put the repeat |
| 1024 |
stuff after it. */ |
stuff after it. */ |
| 1025 |
|
|
| 1026 |
else if (*previous == OP_CLASS || *previous == OP_REF) |
else if (*previous == OP_CLASS || *previous == OP_NEGCLASS || |
| 1027 |
|
*previous == OP_REF) |
| 1028 |
{ |
{ |
| 1029 |
if (repeat_min == 0 && repeat_max == -1) |
if (repeat_min == 0 && repeat_max == -1) |
| 1030 |
*code++ = OP_CRSTAR + repeat_type; |
*code++ = OP_CRSTAR + repeat_type; |
| 2099 |
goto CLASS_REF_REPEAT; |
goto CLASS_REF_REPEAT; |
| 2100 |
|
|
| 2101 |
case OP_CLASS: |
case OP_CLASS: |
| 2102 |
|
case OP_NEGCLASS: |
| 2103 |
{ |
{ |
| 2104 |
int i, min, max; |
int i, min, max; |
| 2105 |
|
|
| 2106 |
code++; |
if (*code++ == OP_CLASS) printf(" ["); |
| 2107 |
printf(" ["); |
else printf(" ^["); |
| 2108 |
|
|
| 2109 |
for (i = 0; i < 256; i++) |
for (i = 0; i < 256; i++) |
| 2110 |
{ |
{ |
| 2724 |
item to see if there is repeat information following. Then obey similar |
item to see if there is repeat information following. Then obey similar |
| 2725 |
code to character type repeats - written out again for speed. If caseless |
code to character type repeats - written out again for speed. If caseless |
| 2726 |
matching was set at runtime but not at compile time, we have to check both |
matching was set at runtime but not at compile time, we have to check both |
| 2727 |
versions of a character. */ |
versions of a character, and we have to behave differently for positive and |
| 2728 |
|
negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are |
| 2729 |
|
treated differently. */ |
| 2730 |
|
|
| 2731 |
case OP_CLASS: |
case OP_CLASS: |
| 2732 |
|
case OP_NEGCLASS: |
| 2733 |
{ |
{ |
| 2734 |
|
BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless; |
| 2735 |
const uschar *data = ecode + 1; /* Save for matching */ |
const uschar *data = ecode + 1; /* Save for matching */ |
| 2736 |
ecode += 33; /* Advance past the item */ |
ecode += 33; /* Advance past the item */ |
| 2737 |
|
|
| 2760 |
break; |
break; |
| 2761 |
|
|
| 2762 |
default: /* No repeat follows */ |
default: /* No repeat follows */ |
| 2763 |
if (eptr >= md->end_subject) return FALSE; |
min = max = 1; |
| 2764 |
c = *eptr++; |
break; |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */ |
|
|
if (md->runtime_caseless) |
|
|
{ |
|
|
c = pcre_fcc[c]; |
|
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; /* With main loop */ |
|
|
} |
|
|
return FALSE; |
|
| 2765 |
} |
} |
| 2766 |
|
|
| 2767 |
/* First, ensure the minimum number of matches are present. */ |
/* First, ensure the minimum number of matches are present. */ |
| 2770 |
{ |
{ |
| 2771 |
if (eptr >= md->end_subject) return FALSE; |
if (eptr >= md->end_subject) return FALSE; |
| 2772 |
c = *eptr++; |
c = *eptr++; |
| 2773 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
| 2774 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
| 2775 |
|
runtime caseless, continue if either case is in the map. */ |
| 2776 |
|
|
| 2777 |
|
if (!nasty_case) |
| 2778 |
{ |
{ |
| 2779 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2780 |
|
if (md->runtime_caseless) |
| 2781 |
|
{ |
| 2782 |
|
c = pcre_fcc[c]; |
| 2783 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2784 |
|
} |
| 2785 |
|
} |
| 2786 |
|
|
| 2787 |
|
/* Runtime caseless and it was a negative class. Continue only if |
| 2788 |
|
both cases are in the map. */ |
| 2789 |
|
|
| 2790 |
|
else |
| 2791 |
|
{ |
| 2792 |
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE; |
| 2793 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
| 2794 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2795 |
} |
} |
| 2796 |
|
|
| 2797 |
return FALSE; |
return FALSE; |
| 2798 |
} |
} |
| 2799 |
|
|
| 2812 |
if (match(eptr, ecode, offset_top, md)) return TRUE; |
if (match(eptr, ecode, offset_top, md)) return TRUE; |
| 2813 |
if (i >= max || eptr >= md->end_subject) return FALSE; |
if (i >= max || eptr >= md->end_subject) return FALSE; |
| 2814 |
c = *eptr++; |
c = *eptr++; |
| 2815 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
| 2816 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
| 2817 |
|
runtime caseless, continue if either case is in the map. */ |
| 2818 |
|
|
| 2819 |
|
if (!nasty_case) |
| 2820 |
|
{ |
| 2821 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2822 |
|
if (md->runtime_caseless) |
| 2823 |
|
{ |
| 2824 |
|
c = pcre_fcc[c]; |
| 2825 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2826 |
|
} |
| 2827 |
|
} |
| 2828 |
|
|
| 2829 |
|
/* Runtime caseless and it was a negative class. Continue only if |
| 2830 |
|
both cases are in the map. */ |
| 2831 |
|
|
| 2832 |
|
else |
| 2833 |
{ |
{ |
| 2834 |
|
if ((data[c/8] & (1 << (c&7))) == 0) return FALSE; |
| 2835 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
| 2836 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2837 |
} |
} |
| 2838 |
|
|
| 2839 |
return FALSE; |
return FALSE; |
| 2840 |
} |
} |
| 2841 |
/* Control never gets here */ |
/* Control never gets here */ |
| 2850 |
{ |
{ |
| 2851 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
| 2852 |
c = *eptr; |
c = *eptr; |
| 2853 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
| 2854 |
if (md->runtime_caseless) |
/* Either not runtime caseless, or it was a positive class. For |
| 2855 |
|
runtime caseless, continue if either case is in the map. */ |
| 2856 |
|
|
| 2857 |
|
if (!nasty_case) |
| 2858 |
|
{ |
| 2859 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2860 |
|
if (md->runtime_caseless) |
| 2861 |
|
{ |
| 2862 |
|
c = pcre_fcc[c]; |
| 2863 |
|
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2864 |
|
} |
| 2865 |
|
} |
| 2866 |
|
|
| 2867 |
|
/* Runtime caseless and it was a negative class. Continue only if |
| 2868 |
|
both cases are in the map. */ |
| 2869 |
|
|
| 2870 |
|
else |
| 2871 |
{ |
{ |
| 2872 |
|
if ((data[c/8] & (1 << (c&7))) == 0) break; |
| 2873 |
c = pcre_fcc[c]; |
c = pcre_fcc[c]; |
| 2874 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 2875 |
} |
} |
| 2876 |
|
|
| 2877 |
break; |
break; |
| 2878 |
} |
} |
| 2879 |
|
|