| 540 |
{ |
{ |
| 541 |
int length = 1 + LINK_SIZE + |
int length = 1 + LINK_SIZE + |
| 542 |
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || |
((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA || |
| 543 |
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? |
*this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)? |
| 544 |
2:0); |
2:0); |
| 545 |
do |
do |
| 546 |
{ |
{ |
| 621 |
for (i = 0; i < active_count; i++) |
for (i = 0; i < active_count; i++) |
| 622 |
{ |
{ |
| 623 |
stateblock *current_state = active_states + i; |
stateblock *current_state = active_states + i; |
| 624 |
BOOL caseless = FALSE; |
BOOL caseless = FALSE; |
| 625 |
const uschar *code; |
const uschar *code; |
| 626 |
int state_offset = current_state->offset; |
int state_offset = current_state->offset; |
| 627 |
int count, codevalue, rrc; |
int count, codevalue, rrc; |
| 738 |
|
|
| 739 |
/* ========================================================================== */ |
/* ========================================================================== */ |
| 740 |
/* Reached a closing bracket. If not at the end of the pattern, carry |
/* Reached a closing bracket. If not at the end of the pattern, carry |
| 741 |
on with the next opcode. For repeating opcodes, also add the repeat |
on with the next opcode. For repeating opcodes, also add the repeat |
| 742 |
state. Note that KETRPOS will always be encountered at the end of the |
state. Note that KETRPOS will always be encountered at the end of the |
| 743 |
subpattern, because the possessive subpattern repeats are always handled |
subpattern, because the possessive subpattern repeats are always handled |
| 744 |
using recursive calls. Thus, it never adds any new states. |
using recursive calls. Thus, it never adds any new states. |
| 745 |
|
|
| 746 |
At the end of the (sub)pattern, unless we have an empty string and |
At the end of the (sub)pattern, unless we have an empty string and |
| 747 |
PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the |
PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the |
| 748 |
start of the subject, save the match data, shifting up all previous |
start of the subject, save the match data, shifting up all previous |
| 751 |
case OP_KET: |
case OP_KET: |
| 752 |
case OP_KETRMIN: |
case OP_KETRMIN: |
| 753 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 754 |
case OP_KETRPOS: |
case OP_KETRPOS: |
| 755 |
if (code != end_code) |
if (code != end_code) |
| 756 |
{ |
{ |
| 757 |
ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); |
ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0); |
| 2179 |
checking (c) can be multibyte. */ |
checking (c) can be multibyte. */ |
| 2180 |
|
|
| 2181 |
case OP_NOTI: |
case OP_NOTI: |
| 2182 |
if (clen > 0 && c != d && c != fcc[d]) |
if (clen > 0 && c != d && c != fcc[d]) |
| 2183 |
{ ADD_NEW(state_offset + dlen + 1, 0); } |
{ ADD_NEW(state_offset + dlen + 1, 0); } |
| 2184 |
break; |
break; |
| 2185 |
|
|
| 2192 |
case OP_NOTPOSPLUSI: |
case OP_NOTPOSPLUSI: |
| 2193 |
caseless = TRUE; |
caseless = TRUE; |
| 2194 |
codevalue -= OP_STARI - OP_STAR; |
codevalue -= OP_STARI - OP_STAR; |
| 2195 |
|
|
| 2196 |
/* Fall through */ |
/* Fall through */ |
| 2197 |
case OP_PLUS: |
case OP_PLUS: |
| 2198 |
case OP_MINPLUS: |
case OP_MINPLUS: |
| 2560 |
cb.capture_top = 1; |
cb.capture_top = 1; |
| 2561 |
cb.capture_last = -1; |
cb.capture_last = -1; |
| 2562 |
cb.callout_data = md->callout_data; |
cb.callout_data = md->callout_data; |
| 2563 |
cb.mark = NULL; /* No (*MARK) support */ |
cb.mark = NULL; /* No (*MARK) support */ |
| 2564 |
if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ |
if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ |
| 2565 |
} |
} |
| 2566 |
if (rrc > 0) break; /* Fail this thread */ |
if (rrc > 0) break; /* Fail this thread */ |
| 2587 |
{ |
{ |
| 2588 |
int value = GET2(code, LINK_SIZE+2); |
int value = GET2(code, LINK_SIZE+2); |
| 2589 |
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND; |
if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND; |
| 2590 |
if (md->recursive != NULL) |
if (md->recursive != NULL) |
| 2591 |
{ ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); } |
{ ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); } |
| 2592 |
else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } |
else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); } |
| 2593 |
} |
} |
| 2626 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 2627 |
case OP_RECURSE: |
case OP_RECURSE: |
| 2628 |
{ |
{ |
| 2629 |
dfa_recursion_info *ri; |
dfa_recursion_info *ri; |
| 2630 |
int local_offsets[1000]; |
int local_offsets[1000]; |
| 2631 |
int local_workspace[1000]; |
int local_workspace[1000]; |
| 2632 |
const uschar *callpat = start_code + GET(code, 1); |
const uschar *callpat = start_code + GET(code, 1); |
| 2633 |
int recno = (callpat == md->start_code)? 0 : |
int recno = (callpat == md->start_code)? 0 : |
| 2634 |
GET2(callpat, 1 + LINK_SIZE); |
GET2(callpat, 1 + LINK_SIZE); |
| 2635 |
int rc; |
int rc; |
| 2636 |
|
|
| 2637 |
DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP)); |
DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP)); |
| 2638 |
|
|
| 2639 |
/* Check for repeating a recursion without advancing the subject |
/* Check for repeating a recursion without advancing the subject |
| 2640 |
pointer. This should catch convoluted mutual recursions. (Some simple |
pointer. This should catch convoluted mutual recursions. (Some simple |
| 2641 |
cases are caught at compile time.) */ |
cases are caught at compile time.) */ |
|
|
|
|
for (ri = md->recursive; ri != NULL; ri = ri->prevrec) |
|
|
if (recno == ri->group_num && ptr == ri->subject_position) |
|
|
return PCRE_ERROR_RECURSELOOP; |
|
| 2642 |
|
|
| 2643 |
/* Remember this recursion and where we started it so as to |
for (ri = md->recursive; ri != NULL; ri = ri->prevrec) |
| 2644 |
|
if (recno == ri->group_num && ptr == ri->subject_position) |
| 2645 |
|
return PCRE_ERROR_RECURSELOOP; |
| 2646 |
|
|
| 2647 |
|
/* Remember this recursion and where we started it so as to |
| 2648 |
catch infinite loops. */ |
catch infinite loops. */ |
| 2649 |
|
|
| 2650 |
new_recursive.group_num = recno; |
new_recursive.group_num = recno; |
| 2651 |
new_recursive.subject_position = ptr; |
new_recursive.subject_position = ptr; |
| 2652 |
new_recursive.prevrec = md->recursive; |
new_recursive.prevrec = md->recursive; |
| 2653 |
md->recursive = &new_recursive; |
md->recursive = &new_recursive; |
| 2654 |
|
|
| 2655 |
rc = internal_dfa_exec( |
rc = internal_dfa_exec( |
| 2656 |
md, /* fixed match data */ |
md, /* fixed match data */ |
| 2665 |
|
|
| 2666 |
md->recursive = new_recursive.prevrec; /* Done this recursion */ |
md->recursive = new_recursive.prevrec; /* Done this recursion */ |
| 2667 |
|
|
| 2668 |
DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP, |
DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP, |
| 2669 |
rc)); |
rc)); |
| 2670 |
|
|
| 2671 |
/* Ran out of internal offsets */ |
/* Ran out of internal offsets */ |
| 2703 |
case OP_SBRAPOS: |
case OP_SBRAPOS: |
| 2704 |
case OP_CBRAPOS: |
case OP_CBRAPOS: |
| 2705 |
case OP_SCBRAPOS: |
case OP_SCBRAPOS: |
| 2706 |
case OP_BRAPOSZERO: |
case OP_BRAPOSZERO: |
| 2707 |
{ |
{ |
| 2708 |
int charcount, matched_count; |
int charcount, matched_count; |
| 2709 |
const uschar *local_ptr = ptr; |
const uschar *local_ptr = ptr; |
| 2710 |
BOOL allow_zero; |
BOOL allow_zero; |
| 2711 |
|
|
| 2712 |
if (codevalue == OP_BRAPOSZERO) |
if (codevalue == OP_BRAPOSZERO) |
| 2713 |
{ |
{ |
| 2714 |
allow_zero = TRUE; |
allow_zero = TRUE; |
| 2715 |
codevalue = *(++code); /* Codevalue will be one of above BRAs */ |
codevalue = *(++code); /* Codevalue will be one of above BRAs */ |
| 2716 |
} |
} |
| 2717 |
else allow_zero = FALSE; |
else allow_zero = FALSE; |
| 2718 |
|
|
| 2719 |
/* Loop to match the subpattern as many times as possible as if it were |
/* Loop to match the subpattern as many times as possible as if it were |
| 2720 |
a complete pattern. */ |
a complete pattern. */ |
| 2721 |
|
|
| 2722 |
for (matched_count = 0;; matched_count++) |
for (matched_count = 0;; matched_count++) |
| 2723 |
{ |
{ |
| 2724 |
int local_offsets[2]; |
int local_offsets[2]; |
| 2725 |
int local_workspace[1000]; |
int local_workspace[1000]; |
| 2726 |
|
|
| 2727 |
int rc = internal_dfa_exec( |
int rc = internal_dfa_exec( |
| 2728 |
md, /* fixed match data */ |
md, /* fixed match data */ |
| 2729 |
code, /* this subexpression's code */ |
code, /* this subexpression's code */ |
| 2734 |
local_workspace, /* workspace vector */ |
local_workspace, /* workspace vector */ |
| 2735 |
sizeof(local_workspace)/sizeof(int), /* size of same */ |
sizeof(local_workspace)/sizeof(int), /* size of same */ |
| 2736 |
rlevel); /* function recursion level */ |
rlevel); /* function recursion level */ |
| 2737 |
|
|
| 2738 |
/* Failed to match */ |
/* Failed to match */ |
| 2739 |
|
|
| 2740 |
if (rc < 0) |
if (rc < 0) |
| 2741 |
{ |
{ |
| 2742 |
if (rc != PCRE_ERROR_NOMATCH) return rc; |
if (rc != PCRE_ERROR_NOMATCH) return rc; |
| 2743 |
break; |
break; |
| 2744 |
} |
} |
| 2745 |
|
|
| 2746 |
/* Matched: break the loop if zero characters matched. */ |
/* Matched: break the loop if zero characters matched. */ |
| 2747 |
|
|
| 2748 |
charcount = local_offsets[1] - local_offsets[0]; |
charcount = local_offsets[1] - local_offsets[0]; |
| 2749 |
if (charcount == 0) break; |
if (charcount == 0) break; |
| 2750 |
local_ptr += charcount; /* Advance temporary position ptr */ |
local_ptr += charcount; /* Advance temporary position ptr */ |
| 2751 |
} |
} |
| 2752 |
|
|
| 2753 |
/* At this point we have matched the subpattern matched_count |
/* At this point we have matched the subpattern matched_count |
| 2754 |
times, and local_ptr is pointing to the character after the end of the |
times, and local_ptr is pointing to the character after the end of the |
| 2755 |
last match. */ |
last match. */ |
| 2756 |
|
|
| 2757 |
if (matched_count > 0 || allow_zero) |
if (matched_count > 0 || allow_zero) |
| 2758 |
{ |
{ |
| 2759 |
const uschar *end_subpattern = code; |
const uschar *end_subpattern = code; |
| 2760 |
int next_state_offset; |
int next_state_offset; |
| 2761 |
|
|
| 2762 |
do { end_subpattern += GET(end_subpattern, 1); } |
do { end_subpattern += GET(end_subpattern, 1); } |
| 2763 |
while (*end_subpattern == OP_ALT); |
while (*end_subpattern == OP_ALT); |
| 2764 |
next_state_offset = |
next_state_offset = |
| 2779 |
{ |
{ |
| 2780 |
const uschar *p = ptr; |
const uschar *p = ptr; |
| 2781 |
const uschar *pp = local_ptr; |
const uschar *pp = local_ptr; |
| 2782 |
charcount = pp - p; |
charcount = pp - p; |
| 2783 |
while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; |
while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--; |
| 2784 |
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); |
ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1)); |
| 2785 |
} |
} |
| 2786 |
} |
} |
| 2787 |
} |
} |
| 2788 |
break; |
break; |
| 2789 |
|
|
| 2790 |
/*-----------------------------------------------------------------*/ |
/*-----------------------------------------------------------------*/ |
| 2791 |
case OP_ONCE: |
case OP_ONCE: |
| 2792 |
{ |
{ |
| 2892 |
cb.capture_top = 1; |
cb.capture_top = 1; |
| 2893 |
cb.capture_last = -1; |
cb.capture_last = -1; |
| 2894 |
cb.callout_data = md->callout_data; |
cb.callout_data = md->callout_data; |
| 2895 |
cb.mark = NULL; /* No (*MARK) support */ |
cb.mark = NULL; /* No (*MARK) support */ |
| 2896 |
if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ |
if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc; /* Abandon */ |
| 2897 |
} |
} |
| 2898 |
if (rrc == 0) |
if (rrc == 0) |
| 3143 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 3144 |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
| 3145 |
{ |
{ |
| 3146 |
int erroroffset; |
int erroroffset; |
| 3147 |
int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset); |
int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset); |
| 3148 |
if (errorcode != 0) |
if (errorcode != 0) |
| 3149 |
{ |
{ |
| 3151 |
{ |
{ |
| 3152 |
offsets[0] = erroroffset; |
offsets[0] = erroroffset; |
| 3153 |
offsets[1] = errorcode; |
offsets[1] = errorcode; |
| 3154 |
} |
} |
| 3155 |
return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)? |
return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)? |
| 3156 |
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; |
PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8; |
| 3157 |
} |
} |
| 3158 |
if (start_offset > 0 && start_offset < length && |
if (start_offset > 0 && start_offset < length && |
| 3159 |
(((USPTR)subject)[start_offset] & 0xc0) == 0x80) |
(((USPTR)subject)[start_offset] & 0xc0) == 0x80) |
| 3160 |
return PCRE_ERROR_BADUTF8_OFFSET; |
return PCRE_ERROR_BADUTF8_OFFSET; |
| 3161 |
} |
} |
| 3162 |
#endif |
#endif |
| 3395 |
/* OK, now we can do the business */ |
/* OK, now we can do the business */ |
| 3396 |
|
|
| 3397 |
md->start_used_ptr = current_subject; |
md->start_used_ptr = current_subject; |
| 3398 |
md->recursive = NULL; |
md->recursive = NULL; |
| 3399 |
|
|
| 3400 |
rc = internal_dfa_exec( |
rc = internal_dfa_exec( |
| 3401 |
md, /* fixed match data */ |
md, /* fixed match data */ |