| 7 |
below for why this module is different). |
below for why this module is different). |
| 8 |
|
|
| 9 |
Written by Philip Hazel |
Written by Philip Hazel |
| 10 |
Copyright (c) 1997-2010 University of Cambridge |
Copyright (c) 1997-2011 University of Cambridge |
| 11 |
|
|
| 12 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 13 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 121 |
0, 0, /* \P, \p */ |
0, 0, /* \P, \p */ |
| 122 |
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */ |
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */ |
| 123 |
0, /* \X */ |
0, /* \X */ |
| 124 |
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ |
0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */ |
| 125 |
1, /* Char */ |
1, /* Char */ |
| 126 |
1, /* Charnc */ |
1, /* Chari */ |
| 127 |
1, /* not */ |
1, /* not */ |
| 128 |
|
1, /* noti */ |
| 129 |
/* Positive single-char repeats */ |
/* Positive single-char repeats */ |
| 130 |
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ |
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ |
| 131 |
3, 3, 3, /* upto, minupto, exact */ |
3, 3, 3, /* upto, minupto, exact */ |
| 132 |
1, 1, 1, 3, /* *+, ++, ?+, upto+ */ |
1, 1, 1, 3, /* *+, ++, ?+, upto+ */ |
| 133 |
|
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */ |
| 134 |
|
3, 3, 3, /* upto I, minupto I, exact I */ |
| 135 |
|
1, 1, 1, 3, /* *+I, ++I, ?+I, upto+I */ |
| 136 |
/* Negative single-char repeats - only for chars < 256 */ |
/* Negative single-char repeats - only for chars < 256 */ |
| 137 |
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ |
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ |
| 138 |
3, 3, 3, /* NOT upto, minupto, exact */ |
3, 3, 3, /* NOT upto, minupto, exact */ |
| 139 |
1, 1, 1, 3, /* NOT *+, ++, ?+, updo+ */ |
1, 1, 1, 3, /* NOT *+, ++, ?+, upto+ */ |
| 140 |
|
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */ |
| 141 |
|
3, 3, 3, /* NOT upto I, minupto I, exact I */ |
| 142 |
|
1, 1, 1, 3, /* NOT *+I, ++I, ?+I, upto+I */ |
| 143 |
/* Positive type repeats */ |
/* Positive type repeats */ |
| 144 |
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ |
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ |
| 145 |
3, 3, 3, /* Type upto, minupto, exact */ |
3, 3, 3, /* Type upto, minupto, exact */ |
| 151 |
0, /* NCLASS */ |
0, /* NCLASS */ |
| 152 |
0, /* XCLASS - variable length */ |
0, /* XCLASS - variable length */ |
| 153 |
0, /* REF */ |
0, /* REF */ |
| 154 |
|
0, /* REFI */ |
| 155 |
0, /* RECURSE */ |
0, /* RECURSE */ |
| 156 |
0, /* CALLOUT */ |
0, /* CALLOUT */ |
| 157 |
0, /* Alt */ |
0, /* Alt */ |
| 158 |
0, /* Ket */ |
0, /* Ket */ |
| 159 |
0, /* KetRmax */ |
0, /* KetRmax */ |
| 160 |
0, /* KetRmin */ |
0, /* KetRmin */ |
| 161 |
|
0, /* KetRpos */ |
| 162 |
0, /* Assert */ |
0, /* Assert */ |
| 163 |
0, /* Assert not */ |
0, /* Assert not */ |
| 164 |
0, /* Assert behind */ |
0, /* Assert behind */ |
| 165 |
0, /* Assert behind not */ |
0, /* Assert behind not */ |
| 166 |
0, /* Reverse */ |
0, /* Reverse */ |
| 167 |
0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */ |
0, 0, 0, 0, 0, 0, /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */ |
| 168 |
0, 0, 0, /* SBRA, SCBRA, SCOND */ |
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ |
| 169 |
0, 0, /* CREF, NCREF */ |
0, 0, /* CREF, NCREF */ |
| 170 |
0, 0, /* RREF, NRREF */ |
0, 0, /* RREF, NRREF */ |
| 171 |
0, /* DEF */ |
0, /* DEF */ |
| 172 |
0, 0, /* BRAZERO, BRAMINZERO */ |
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ |
| 173 |
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG, */ |
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG, */ |
| 174 |
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG, */ |
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG, */ |
| 175 |
0, 0, 0, 0, 0 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */ |
0, 0, 0, 0, 0 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */ |
| 188 |
1, 1, /* \P, \p */ |
1, 1, /* \P, \p */ |
| 189 |
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ |
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ |
| 190 |
1, /* \X */ |
1, /* \X */ |
| 191 |
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ |
0, 0, 0, 0, 0, 0, /* \Z, \z, ^, ^M, $, $M */ |
| 192 |
1, /* Char */ |
1, /* Char */ |
| 193 |
1, /* Charnc */ |
1, /* Chari */ |
| 194 |
1, /* not */ |
1, /* not */ |
| 195 |
|
1, /* noti */ |
| 196 |
/* Positive single-char repeats */ |
/* Positive single-char repeats */ |
| 197 |
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ |
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ |
| 198 |
1, 1, 1, /* upto, minupto, exact */ |
1, 1, 1, /* upto, minupto, exact */ |
| 199 |
1, 1, 1, 1, /* *+, ++, ?+, upto+ */ |
1, 1, 1, 1, /* *+, ++, ?+, upto+ */ |
| 200 |
|
1, 1, 1, 1, 1, 1, /* *I, *?I, +I, +?I, ?I, ??I */ |
| 201 |
|
1, 1, 1, /* upto I, minupto I, exact I */ |
| 202 |
|
1, 1, 1, 1, /* *+I, ++I, ?+I, upto+I */ |
| 203 |
/* Negative single-char repeats - only for chars < 256 */ |
/* Negative single-char repeats - only for chars < 256 */ |
| 204 |
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ |
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ |
| 205 |
1, 1, 1, /* NOT upto, minupto, exact */ |
1, 1, 1, /* NOT upto, minupto, exact */ |
| 206 |
1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */ |
1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */ |
| 207 |
|
1, 1, 1, 1, 1, 1, /* NOT *I, *?I, +I, +?I, ?I, ??I */ |
| 208 |
|
1, 1, 1, /* NOT upto I, minupto I, exact I */ |
| 209 |
|
1, 1, 1, 1, /* NOT *+I, ++I, ?+I, upto+I */ |
| 210 |
/* Positive type repeats */ |
/* Positive type repeats */ |
| 211 |
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ |
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ |
| 212 |
1, 1, 1, /* Type upto, minupto, exact */ |
1, 1, 1, /* Type upto, minupto, exact */ |
| 218 |
1, /* NCLASS */ |
1, /* NCLASS */ |
| 219 |
1, /* XCLASS - variable length */ |
1, /* XCLASS - variable length */ |
| 220 |
0, /* REF */ |
0, /* REF */ |
| 221 |
|
0, /* REFI */ |
| 222 |
0, /* RECURSE */ |
0, /* RECURSE */ |
| 223 |
0, /* CALLOUT */ |
0, /* CALLOUT */ |
| 224 |
0, /* Alt */ |
0, /* Alt */ |
| 225 |
0, /* Ket */ |
0, /* Ket */ |
| 226 |
0, /* KetRmax */ |
0, /* KetRmax */ |
| 227 |
0, /* KetRmin */ |
0, /* KetRmin */ |
| 228 |
|
0, /* KetRpos */ |
| 229 |
0, /* Assert */ |
0, /* Assert */ |
| 230 |
0, /* Assert not */ |
0, /* Assert not */ |
| 231 |
0, /* Assert behind */ |
0, /* Assert behind */ |
| 232 |
0, /* Assert behind not */ |
0, /* Assert behind not */ |
| 233 |
0, /* Reverse */ |
0, /* Reverse */ |
| 234 |
0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */ |
0, 0, 0, 0, 0, 0, /* ONCE, BRA, BRAPOS, CBRA, CBRAPOS, COND */ |
| 235 |
0, 0, 0, /* SBRA, SCBRA, SCOND */ |
0, 0, 0, 0, 0, /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND */ |
| 236 |
0, 0, /* CREF, NCREF */ |
0, 0, /* CREF, NCREF */ |
| 237 |
0, 0, /* RREF, NRREF */ |
0, 0, /* RREF, NRREF */ |
| 238 |
0, /* DEF */ |
0, /* DEF */ |
| 239 |
0, 0, /* BRAZERO, BRAMINZERO */ |
0, 0, 0, /* BRAZERO, BRAMINZERO, BRAPOSZERO */ |
| 240 |
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG, */ |
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG, */ |
| 241 |
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG, */ |
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG, */ |
| 242 |
0, 0, 0, 0, 0 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */ |
0, 0, 0, 0, 0 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */ |
| 270 |
typedef struct stateblock { |
typedef struct stateblock { |
| 271 |
int offset; /* Offset to opcode */ |
int offset; /* Offset to opcode */ |
| 272 |
int count; /* Count for repeats */ |
int count; /* Count for repeats */ |
|
int ims; /* ims flag bits */ |
|
| 273 |
int data; /* Some use extra data */ |
int data; /* Some use extra data */ |
| 274 |
} stateblock; |
} stateblock; |
| 275 |
|
|
| 325 |
offsetcount size of same |
offsetcount size of same |
| 326 |
workspace vector of workspace |
workspace vector of workspace |
| 327 |
wscount size of same |
wscount size of same |
|
ims the current ims flags |
|
| 328 |
rlevel function call recursion level |
rlevel function call recursion level |
| 329 |
recursing regex recursive call level |
recursing regex recursive call level |
| 330 |
|
|
| 341 |
{ \ |
{ \ |
| 342 |
next_active_state->offset = (x); \ |
next_active_state->offset = (x); \ |
| 343 |
next_active_state->count = (y); \ |
next_active_state->count = (y); \ |
|
next_active_state->ims = ims; \ |
|
| 344 |
next_active_state++; \ |
next_active_state++; \ |
| 345 |
DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ |
DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ |
| 346 |
} \ |
} \ |
| 351 |
{ \ |
{ \ |
| 352 |
next_active_state->offset = (x); \ |
next_active_state->offset = (x); \ |
| 353 |
next_active_state->count = (y); \ |
next_active_state->count = (y); \ |
|
next_active_state->ims = ims; \ |
|
| 354 |
next_active_state->data = (z); \ |
next_active_state->data = (z); \ |
| 355 |
next_active_state++; \ |
next_active_state++; \ |
| 356 |
DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ |
DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ |
| 362 |
{ \ |
{ \ |
| 363 |
next_new_state->offset = (x); \ |
next_new_state->offset = (x); \ |
| 364 |
next_new_state->count = (y); \ |
next_new_state->count = (y); \ |
|
next_new_state->ims = ims; \ |
|
| 365 |
next_new_state++; \ |
next_new_state++; \ |
| 366 |
DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ |
DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \ |
| 367 |
} \ |
} \ |
| 372 |
{ \ |
{ \ |
| 373 |
next_new_state->offset = (x); \ |
next_new_state->offset = (x); \ |
| 374 |
next_new_state->count = (y); \ |
next_new_state->count = (y); \ |
|
next_new_state->ims = ims; \ |
|
| 375 |
next_new_state->data = (z); \ |
next_new_state->data = (z); \ |
| 376 |
next_new_state++; \ |
next_new_state++; \ |
| 377 |
DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ |
DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \ |
| 390 |
int offsetcount, |
int offsetcount, |
| 391 |
int *workspace, |
int *workspace, |
| 392 |
int wscount, |
int wscount, |
|
int ims, |
|
| 393 |
int rlevel, |
int rlevel, |
| 394 |
int recursing) |
int recursing) |
| 395 |
{ |
{ |
| 437 |
new_count = 0; |
new_count = 0; |
| 438 |
|
|
| 439 |
first_op = this_start_code + 1 + LINK_SIZE + |
first_op = this_start_code + 1 + LINK_SIZE + |
| 440 |
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0); |
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || |
| 441 |
|
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? 2:0); |
| 442 |
|
|
| 443 |
/* The first thing in any (sub) pattern is a bracket of some sort. Push all |
/* The first thing in any (sub) pattern is a bracket of some sort. Push all |
| 444 |
the alternative states onto the list, and find out where the end is. This |
the alternative states onto the list, and find out where the end is. This |
| 537 |
else |
else |
| 538 |
{ |
{ |
| 539 |
int length = 1 + LINK_SIZE + |
int length = 1 + LINK_SIZE + |
| 540 |
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0); |
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || |
| 541 |
|
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? |
| 542 |
|
2:0); |
| 543 |
do |
do |
| 544 |
{ |
{ |
| 545 |
ADD_NEW((int)(end_code - start_code + length), 0); |
ADD_NEW((int)(end_code - start_code + length), 0); |
| 619 |
for (i = 0; i < active_count; i++) |
for (i = 0; i < active_count; i++) |
| 620 |
{ |
{ |
| 621 |
stateblock *current_state = active_states + i; |
stateblock *current_state = active_states + i; |
| 622 |
|
BOOL caseless = FALSE; |
| 623 |
const uschar *code; |
const uschar *code; |
| 624 |
int state_offset = current_state->offset; |
int state_offset = current_state->offset; |
| 625 |
int count, codevalue, rrc; |
int count, codevalue, rrc; |
| 631 |
else printf("0x%02x\n", c); |
else printf("0x%02x\n", c); |
| 632 |
#endif |
#endif |
| 633 |
|
|
|
/* This variable is referred to implicity in the ADD_xxx macros. */ |
|
|
|
|
|
ims = current_state->ims; |
|
|
|
|
| 634 |
/* A negative offset is a special case meaning "hold off going to this |
/* A negative offset is a special case meaning "hold off going to this |
| 635 |
(negated) state until the number of characters in the data field have |
(negated) state until the number of characters in the data field have |
| 636 |
been skipped". */ |
been skipped". */ |
| 736 |
|
|
| 737 |
/* ========================================================================== */ |
/* ========================================================================== */ |
| 738 |
/* Reached a closing bracket. If not at the end of the pattern, carry |
/* Reached a closing bracket. If not at the end of the pattern, carry |
| 739 |
on with the next opcode. Otherwise, unless we have an empty string and |
on with the next opcode. For repeating opcodes, also add the repeat |
| 740 |
|
state. Note that KETRPOS will always be encountered at the end of the |
| 741 |
|
subpattern, because the possessive subpattern repeats are always handled |
| 742 |
|
using recursive calls. Thus, it never adds any new states. |
| 743 |
|
|
| 744 |
|
At the end of the (sub)pattern, unless we have an empty string and |
| 745 |
PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the |
PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the |
| 746 |
start of the subject, save the match data, shifting up all previous |
start of the subject, save the match data, shifting up all previous |
| 747 |
matches so we always have the longest first. */ |
matches so we always have the longest first. */ |
| 749 |
case OP_KET: |
case OP_KET: |
| 750 |
case OP_KETRMIN: |
case OP_KETRMIN: |
| 751 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 752 |
|
case OP_KETRPOS: |
| 753 |
if (code != end_code) |
if (code != end_code) |
| 754 |
{ |
{ |
| 755 |
ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); |
ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); |
| 839 |
|
|
| 840 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 841 |
case OP_CIRC: |
case OP_CIRC: |
| 842 |
|
if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) |
| 843 |
|
{ ADD_ACTIVE(state_offset + 1, 0); } |
| 844 |
|
break; |
| 845 |
|
|
| 846 |
|
/*-----------------------------------------------------------------*/ |
| 847 |
|
case OP_CIRCM: |
| 848 |
if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) || |
if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) || |
| 849 |
((ims & PCRE_MULTILINE) != 0 && |
(ptr != end_subject && WAS_NEWLINE(ptr))) |
|
ptr != end_subject && |
|
|
WAS_NEWLINE(ptr))) |
|
| 850 |
{ ADD_ACTIVE(state_offset + 1, 0); } |
{ ADD_ACTIVE(state_offset + 1, 0); } |
| 851 |
break; |
break; |
| 852 |
|
|
| 853 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 854 |
case OP_EOD: |
case OP_EOD: |
| 855 |
if (ptr >= end_subject) |
if (ptr >= end_subject) |
| 856 |
{ |
{ |
| 857 |
if ((md->moptions & PCRE_PARTIAL_HARD) != 0) |
if ((md->moptions & PCRE_PARTIAL_HARD) != 0) |
| 858 |
could_continue = TRUE; |
could_continue = TRUE; |
| 859 |
else { ADD_ACTIVE(state_offset + 1, 0); } |
else { ADD_ACTIVE(state_offset + 1, 0); } |
| 861 |
break; |
break; |
| 862 |
|
|
| 863 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
|
case OP_OPT: |
|
|
ims = code[1]; |
|
|
ADD_ACTIVE(state_offset + 2, 0); |
|
|
break; |
|
|
|
|
|
/*-----------------------------------------------------------------*/ |
|
| 864 |
case OP_SOD: |
case OP_SOD: |
| 865 |
if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); } |
if (ptr == start_subject) { ADD_ACTIVE(state_offset + 1, 0); } |
| 866 |
break; |
break; |
| 905 |
could_continue = TRUE; |
could_continue = TRUE; |
| 906 |
else if (clen == 0 || |
else if (clen == 0 || |
| 907 |
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) && |
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) && |
| 908 |
((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen) |
(ptr == end_subject - md->nllen) |
| 909 |
)) |
)) |
| 910 |
{ ADD_ACTIVE(state_offset + 1, 0); } |
{ ADD_ACTIVE(state_offset + 1, 0); } |
| 911 |
} |
} |
| 912 |
else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr)) |
break; |
| 913 |
|
|
| 914 |
|
/*-----------------------------------------------------------------*/ |
| 915 |
|
case OP_DOLLM: |
| 916 |
|
if ((md->moptions & PCRE_NOTEOL) == 0) |
| 917 |
|
{ |
| 918 |
|
if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0) |
| 919 |
|
could_continue = TRUE; |
| 920 |
|
else if (clen == 0 || |
| 921 |
|
((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr))) |
| 922 |
|
{ ADD_ACTIVE(state_offset + 1, 0); } |
| 923 |
|
} |
| 924 |
|
else if (IS_NEWLINE(ptr)) |
| 925 |
{ ADD_ACTIVE(state_offset + 1, 0); } |
{ ADD_ACTIVE(state_offset + 1, 0); } |
| 926 |
break; |
break; |
| 927 |
|
|
| 1977 |
break; |
break; |
| 1978 |
|
|
| 1979 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 1980 |
case OP_CHARNC: |
case OP_CHARI: |
| 1981 |
if (clen == 0) break; |
if (clen == 0) break; |
| 1982 |
|
|
| 1983 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2163 |
break; |
break; |
| 2164 |
|
|
| 2165 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 2166 |
/* Match a negated single character. This is only used for one-byte |
/* Match a negated single character casefully. This is only used for |
| 2167 |
characters, that is, we know that d < 256. The character we are |
one-byte characters, that is, we know that d < 256. The character we are |
| 2168 |
checking (c) can be multibyte. */ |
checking (c) can be multibyte. */ |
| 2169 |
|
|
| 2170 |
case OP_NOT: |
case OP_NOT: |
| 2171 |
if (clen > 0) |
if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); } |
|
{ |
|
|
unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d; |
|
|
if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); } |
|
|
} |
|
| 2172 |
break; |
break; |
| 2173 |
|
|
| 2174 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 2175 |
|
/* Match a negated single character caselessly. This is only used for |
| 2176 |
|
one-byte characters, that is, we know that d < 256. The character we are |
| 2177 |
|
checking (c) can be multibyte. */ |
| 2178 |
|
|
| 2179 |
|
case OP_NOTI: |
| 2180 |
|
if (clen > 0 && c != d && c != fcc[d]) |
| 2181 |
|
{ ADD_NEW(state_offset + dlen + 1, 0); } |
| 2182 |
|
break; |
| 2183 |
|
|
| 2184 |
|
/*-----------------------------------------------------------------*/ |
| 2185 |
|
case OP_PLUSI: |
| 2186 |
|
case OP_MINPLUSI: |
| 2187 |
|
case OP_POSPLUSI: |
| 2188 |
|
case OP_NOTPLUSI: |
| 2189 |
|
case OP_NOTMINPLUSI: |
| 2190 |
|
case OP_NOTPOSPLUSI: |
| 2191 |
|
caseless = TRUE; |
| 2192 |
|
codevalue -= OP_STARI - OP_STAR; |
| 2193 |
|
|
| 2194 |
|
/* Fall through */ |
| 2195 |
case OP_PLUS: |
case OP_PLUS: |
| 2196 |
case OP_MINPLUS: |
case OP_MINPLUS: |
| 2197 |
case OP_POSPLUS: |
case OP_POSPLUS: |
| 2203 |
if (clen > 0) |
if (clen > 0) |
| 2204 |
{ |
{ |
| 2205 |
unsigned int otherd = NOTACHAR; |
unsigned int otherd = NOTACHAR; |
| 2206 |
if ((ims & PCRE_CASELESS) != 0) |
if (caseless) |
| 2207 |
{ |
{ |
| 2208 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2209 |
if (utf8 && d >= 128) |
if (utf8 && d >= 128) |
| 2231 |
break; |
break; |
| 2232 |
|
|
| 2233 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 2234 |
|
case OP_QUERYI: |
| 2235 |
|
case OP_MINQUERYI: |
| 2236 |
|
case OP_POSQUERYI: |
| 2237 |
|
case OP_NOTQUERYI: |
| 2238 |
|
case OP_NOTMINQUERYI: |
| 2239 |
|
case OP_NOTPOSQUERYI: |
| 2240 |
|
caseless = TRUE; |
| 2241 |
|
codevalue -= OP_STARI - OP_STAR; |
| 2242 |
|
/* Fall through */ |
| 2243 |
case OP_QUERY: |
case OP_QUERY: |
| 2244 |
case OP_MINQUERY: |
case OP_MINQUERY: |
| 2245 |
case OP_POSQUERY: |
case OP_POSQUERY: |
| 2250 |
if (clen > 0) |
if (clen > 0) |
| 2251 |
{ |
{ |
| 2252 |
unsigned int otherd = NOTACHAR; |
unsigned int otherd = NOTACHAR; |
| 2253 |
if ((ims & PCRE_CASELESS) != 0) |
if (caseless) |
| 2254 |
{ |
{ |
| 2255 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2256 |
if (utf8 && d >= 128) |
if (utf8 && d >= 128) |
| 2276 |
break; |
break; |
| 2277 |
|
|
| 2278 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 2279 |
|
case OP_STARI: |
| 2280 |
|
case OP_MINSTARI: |
| 2281 |
|
case OP_POSSTARI: |
| 2282 |
|
case OP_NOTSTARI: |
| 2283 |
|
case OP_NOTMINSTARI: |
| 2284 |
|
case OP_NOTPOSSTARI: |
| 2285 |
|
caseless = TRUE; |
| 2286 |
|
codevalue -= OP_STARI - OP_STAR; |
| 2287 |
|
/* Fall through */ |
| 2288 |
case OP_STAR: |
case OP_STAR: |
| 2289 |
case OP_MINSTAR: |
case OP_MINSTAR: |
| 2290 |
case OP_POSSTAR: |
case OP_POSSTAR: |
| 2295 |
if (clen > 0) |
if (clen > 0) |
| 2296 |
{ |
{ |
| 2297 |
unsigned int otherd = NOTACHAR; |
unsigned int otherd = NOTACHAR; |
| 2298 |
if ((ims & PCRE_CASELESS) != 0) |
if (caseless) |
| 2299 |
{ |
{ |
| 2300 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2301 |
if (utf8 && d >= 128) |
if (utf8 && d >= 128) |
| 2321 |
break; |
break; |
| 2322 |
|
|
| 2323 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 2324 |
|
case OP_EXACTI: |
| 2325 |
|
case OP_NOTEXACTI: |
| 2326 |
|
caseless = TRUE; |
| 2327 |
|
codevalue -= OP_STARI - OP_STAR; |
| 2328 |
|
/* Fall through */ |
| 2329 |
case OP_EXACT: |
case OP_EXACT: |
| 2330 |
case OP_NOTEXACT: |
case OP_NOTEXACT: |
| 2331 |
count = current_state->count; /* Number already matched */ |
count = current_state->count; /* Number already matched */ |
| 2332 |
if (clen > 0) |
if (clen > 0) |
| 2333 |
{ |
{ |
| 2334 |
unsigned int otherd = NOTACHAR; |
unsigned int otherd = NOTACHAR; |
| 2335 |
if ((ims & PCRE_CASELESS) != 0) |
if (caseless) |
| 2336 |
{ |
{ |
| 2337 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2338 |
if (utf8 && d >= 128) |
if (utf8 && d >= 128) |
| 2356 |
break; |
break; |
| 2357 |
|
|
| 2358 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 2359 |
|
case OP_UPTOI: |
| 2360 |
|
case OP_MINUPTOI: |
| 2361 |
|
case OP_POSUPTOI: |
| 2362 |
|
case OP_NOTUPTOI: |
| 2363 |
|
case OP_NOTMINUPTOI: |
| 2364 |
|
case OP_NOTPOSUPTOI: |
| 2365 |
|
caseless = TRUE; |
| 2366 |
|
codevalue -= OP_STARI - OP_STAR; |
| 2367 |
|
/* Fall through */ |
| 2368 |
case OP_UPTO: |
case OP_UPTO: |
| 2369 |
case OP_MINUPTO: |
case OP_MINUPTO: |
| 2370 |
case OP_POSUPTO: |
case OP_POSUPTO: |
| 2376 |
if (clen > 0) |
if (clen > 0) |
| 2377 |
{ |
{ |
| 2378 |
unsigned int otherd = NOTACHAR; |
unsigned int otherd = NOTACHAR; |
| 2379 |
if ((ims & PCRE_CASELESS) != 0) |
if (caseless) |
| 2380 |
{ |
{ |
| 2381 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 2382 |
if (utf8 && d >= 128) |
if (utf8 && d >= 128) |
| 2519 |
sizeof(local_offsets)/sizeof(int), /* size of same */ |
sizeof(local_offsets)/sizeof(int), /* size of same */ |
| 2520 |
local_workspace, /* workspace vector */ |
local_workspace, /* workspace vector */ |
| 2521 |
sizeof(local_workspace)/sizeof(int), /* size of same */ |
sizeof(local_workspace)/sizeof(int), /* size of same */ |
|
ims, /* the current ims flags */ |
|
| 2522 |
rlevel, /* function recursion level */ |
rlevel, /* function recursion level */ |
| 2523 |
recursing); /* pass on regex recursion */ |
recursing); /* pass on regex recursion */ |
| 2524 |
|
|
| 2609 |
sizeof(local_offsets)/sizeof(int), /* size of same */ |
sizeof(local_offsets)/sizeof(int), /* size of same */ |
| 2610 |
local_workspace, /* workspace vector */ |
local_workspace, /* workspace vector */ |
| 2611 |
sizeof(local_workspace)/sizeof(int), /* size of same */ |
sizeof(local_workspace)/sizeof(int), /* size of same */ |
|
ims, /* the current ims flags */ |
|
| 2612 |
rlevel, /* function recursion level */ |
rlevel, /* function recursion level */ |
| 2613 |
recursing); /* pass on regex recursion */ |
recursing); /* pass on regex recursion */ |
| 2614 |
|
|
| 2641 |
sizeof(local_offsets)/sizeof(int), /* size of same */ |
sizeof(local_offsets)/sizeof(int), /* size of same */ |
| 2642 |
local_workspace, /* workspace vector */ |
local_workspace, /* workspace vector */ |
| 2643 |
sizeof(local_workspace)/sizeof(int), /* size of same */ |
sizeof(local_workspace)/sizeof(int), /* size of same */ |
|
ims, /* the current ims flags */ |
|
| 2644 |
rlevel, /* function recursion level */ |
rlevel, /* function recursion level */ |
| 2645 |
recursing + 1); /* regex recurse level */ |
recursing + 1); /* regex recurse level */ |
| 2646 |
|
|
| 2678 |
break; |
break; |
| 2679 |
|
|
| 2680 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 2681 |
|
case OP_BRAPOS: |
| 2682 |
|
case OP_SBRAPOS: |
| 2683 |
|
case OP_CBRAPOS: |
| 2684 |
|
case OP_SCBRAPOS: |
| 2685 |
|
case OP_BRAPOSZERO: |
| 2686 |
|
{ |
| 2687 |
|
int charcount, matched_count; |
| 2688 |
|
const uschar *local_ptr = ptr; |
| 2689 |
|
BOOL allow_zero; |
| 2690 |
|
|
| 2691 |
|
if (codevalue == OP_BRAPOSZERO) |
| 2692 |
|
{ |
| 2693 |
|
allow_zero = TRUE; |
| 2694 |
|
codevalue = *(++code); /* Codevalue will be one of above BRAs */ |
| 2695 |
|
} |
| 2696 |
|
else allow_zero = FALSE; |
| 2697 |
|
|
| 2698 |
|
/* Loop to match the subpattern as many times as possible as if it were |
| 2699 |
|
a complete pattern. */ |
| 2700 |
|
|
| 2701 |
|
for (matched_count = 0;; matched_count++) |
| 2702 |
|
{ |
| 2703 |
|
int local_offsets[2]; |
| 2704 |
|
int local_workspace[1000]; |
| 2705 |
|
|
| 2706 |
|
int rc = internal_dfa_exec( |
| 2707 |
|
md, /* fixed match data */ |
| 2708 |
|
code, /* this subexpression's code */ |
| 2709 |
|
local_ptr, /* where we currently are */ |
| 2710 |
|
(int)(ptr - start_subject), /* start offset */ |
| 2711 |
|
local_offsets, /* offset vector */ |
| 2712 |
|
sizeof(local_offsets)/sizeof(int), /* size of same */ |
| 2713 |
|
local_workspace, /* workspace vector */ |
| 2714 |
|
sizeof(local_workspace)/sizeof(int), /* size of same */ |
| 2715 |
|
rlevel, /* function recursion level */ |
| 2716 |
|
recursing); /* pass on regex recursion */ |
| 2717 |
|
|
| 2718 |
|
/* Failed to match */ |
| 2719 |
|
|
| 2720 |
|
if (rc < 0) |
| 2721 |
|
{ |
| 2722 |
|
if (rc != PCRE_ERROR_NOMATCH) return rc; |
| 2723 |
|
break; |
| 2724 |
|
} |
| 2725 |
|
|
| 2726 |
|
/* Matched: break the loop if zero characters matched. */ |
| 2727 |
|
|
| 2728 |
|
charcount = local_offsets[1] - local_offsets[0]; |
| 2729 |
|
if (charcount == 0) break; |
| 2730 |
|
local_ptr += charcount; /* Advance temporary position ptr */ |
| 2731 |
|
} |
| 2732 |
|
|
| 2733 |
|
/* At this point we have matched the subpattern matched_count |
| 2734 |
|
times, and local_ptr is pointing to the character after the end of the |
| 2735 |
|
last match. */ |
| 2736 |
|
|
| 2737 |
|
if (matched_count > 0 || allow_zero) |
| 2738 |
|
{ |
| 2739 |
|
const uschar *end_subpattern = code; |
| 2740 |
|
int next_state_offset; |
| 2741 |
|
|
| 2742 |
|
do { end_subpattern += GET(end_subpattern, 1); } |
| 2743 |
|
while (*end_subpattern == OP_ALT); |
| 2744 |
|
next_state_offset = |
| 2745 |
|
(int)(end_subpattern - start_code + LINK_SIZE + 1); |
| 2746 |
|
|
| 2747 |
|
/* Optimization: if there are no more active states, and there |
| 2748 |
|
are no new states yet set up, then skip over the subject string |
| 2749 |
|
right here, to save looping. Otherwise, set up the new state to swing |
| 2750 |
|
into action when the end of the matched substring is reached. */ |
| 2751 |
|
|
| 2752 |
|
if (i + 1 >= active_count && new_count == 0) |
| 2753 |
|
{ |
| 2754 |
|
ptr = local_ptr; |
| 2755 |
|
clen = 0; |
| 2756 |
|
ADD_NEW(next_state_offset, 0); |
| 2757 |
|
} |
| 2758 |
|
else |
| 2759 |
|
{ |
| 2760 |
|
const uschar *p = ptr; |
| 2761 |
|
const uschar *pp = local_ptr; |
| 2762 |
|
charcount = pp - p; |
| 2763 |
|
while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; |
| 2764 |
|
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); |
| 2765 |
|
} |
| 2766 |
|
} |
| 2767 |
|
} |
| 2768 |
|
break; |
| 2769 |
|
|
| 2770 |
|
/*-----------------------------------------------------------------*/ |
| 2771 |
case OP_ONCE: |
case OP_ONCE: |
| 2772 |
{ |
{ |
| 2773 |
int local_offsets[2]; |
int local_offsets[2]; |
| 2782 |
sizeof(local_offsets)/sizeof(int), /* size of same */ |
sizeof(local_offsets)/sizeof(int), /* size of same */ |
| 2783 |
local_workspace, /* workspace vector */ |
local_workspace, /* workspace vector */ |
| 2784 |
sizeof(local_workspace)/sizeof(int), /* size of same */ |
sizeof(local_workspace)/sizeof(int), /* size of same */ |
|
ims, /* the current ims flags */ |
|
| 2785 |
rlevel, /* function recursion level */ |
rlevel, /* function recursion level */ |
| 2786 |
recursing); /* pass on regex recursion */ |
recursing); /* pass on regex recursion */ |
| 2787 |
|
|
| 2817 |
/* Optimization: if there are no more active states, and there |
/* Optimization: if there are no more active states, and there |
| 2818 |
are no new states yet set up, then skip over the subject string |
are no new states yet set up, then skip over the subject string |
| 2819 |
right here, to save looping. Otherwise, set up the new state to swing |
right here, to save looping. Otherwise, set up the new state to swing |
| 2820 |
into action when the end of the substring is reached. */ |
into action when the end of the matched substring is reached. */ |
| 2821 |
|
|
| 2822 |
else if (i + 1 >= active_count && new_count == 0) |
else if (i + 1 >= active_count && new_count == 0) |
| 2823 |
{ |
{ |
| 2847 |
if (repeat_state_offset >= 0) |
if (repeat_state_offset >= 0) |
| 2848 |
{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } |
{ ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); } |
| 2849 |
} |
} |
|
|
|
| 2850 |
} |
} |
| 2851 |
else if (rc != PCRE_ERROR_NOMATCH) return rc; |
else if (rc != PCRE_ERROR_NOMATCH) return rc; |
| 2852 |
} |
} |
| 3123 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 3124 |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
| 3125 |
{ |
{ |
| 3126 |
if (_pcre_valid_utf8((uschar *)subject, length) >= 0) |
int errorcode; |
| 3127 |
return PCRE_ERROR_BADUTF8; |
int tb = _pcre_valid_utf8((uschar *)subject, length, &errorcode); |
| 3128 |
|
if (tb >= 0) |
| 3129 |
|
{ |
| 3130 |
|
if (offsetcount >= 2) |
| 3131 |
|
{ |
| 3132 |
|
offsets[0] = tb; |
| 3133 |
|
offsets[1] = errorcode; |
| 3134 |
|
} |
| 3135 |
|
return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)? |
| 3136 |
|
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; |
| 3137 |
|
} |
| 3138 |
if (start_offset > 0 && start_offset < length) |
if (start_offset > 0 && start_offset < length) |
| 3139 |
{ |
{ |
| 3140 |
int tb = ((USPTR)subject)[start_offset] & 0xc0; |
tb = ((USPTR)subject)[start_offset] & 0xc0; |
| 3141 |
if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET; |
if (tb == 0x80) return PCRE_ERROR_BADUTF8_OFFSET; |
| 3142 |
} |
} |
| 3143 |
} |
} |
| 3225 |
|
|
| 3226 |
/* There are some optimizations that avoid running the match if a known |
/* There are some optimizations that avoid running the match if a known |
| 3227 |
starting point is not found. However, there is an option that disables |
starting point is not found. However, there is an option that disables |
| 3228 |
these, for testing and for ensuring that all callouts do actually occur. */ |
these, for testing and for ensuring that all callouts do actually occur. |
| 3229 |
|
The option can be set in the regex by (*NO_START_OPT) or passed in |
| 3230 |
|
match-time options. */ |
| 3231 |
|
|
| 3232 |
if ((options & PCRE_NO_START_OPTIMIZE) == 0) |
if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0) |
| 3233 |
{ |
{ |
| 3234 |
/* Advance to a known first byte. */ |
/* Advance to a known first byte. */ |
| 3235 |
|
|
| 3387 |
offsetcount, /* size of same */ |
offsetcount, /* size of same */ |
| 3388 |
workspace, /* workspace vector */ |
workspace, /* workspace vector */ |
| 3389 |
wscount, /* size of same */ |
wscount, /* size of same */ |
|
re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */ |
|
| 3390 |
0, /* function recurse level */ |
0, /* function recurse level */ |
| 3391 |
0); /* regex recurse level */ |
0); /* regex recurse level */ |
| 3392 |
|
|