| 109 |
character that is to be tested in some way. This makes is possible to |
character that is to be tested in some way. This makes is possible to |
| 110 |
centralize the loading of these characters. In the case of Type * etc, the |
centralize the loading of these characters. In the case of Type * etc, the |
| 111 |
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a |
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a |
| 112 |
small value. ***NOTE*** If the start of this table is modified, the two tables |
small value. Non-zero values in the table are the offsets from the opcode where |
| 113 |
that follow must also be modified. */ |
the character is to be found. ***NOTE*** If the start of this table is |
| 114 |
|
modified, the three tables that follow must also be modified. */ |
| 115 |
|
|
| 116 |
static const uschar coptable[] = { |
static const uschar coptable[] = { |
| 117 |
0, /* End */ |
0, /* End */ |
| 161 |
0, /* DEF */ |
0, /* DEF */ |
| 162 |
0, 0, /* BRAZERO, BRAMINZERO */ |
0, 0, /* BRAZERO, BRAMINZERO */ |
| 163 |
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */ |
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */ |
| 164 |
0, 0, 0 /* FAIL, ACCEPT, SKIPZERO */ |
0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */ |
| 165 |
|
}; |
| 166 |
|
|
| 167 |
|
/* This table identifies those opcodes that inspect a character. It is used to |
| 168 |
|
remember the fact that a character could have been inspected when the end of |
| 169 |
|
the subject is reached, in order to support PCRE_PARTIAL_HARD behaviour. |
| 170 |
|
***NOTE*** If the start of this table is modified, the two tables that follow |
| 171 |
|
must also be modified. */ |
| 172 |
|
|
| 173 |
|
static const uschar poptable[] = { |
| 174 |
|
0, /* End */ |
| 175 |
|
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ |
| 176 |
|
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ |
| 177 |
|
1, 1, 1, /* Any, AllAny, Anybyte */ |
| 178 |
|
1, 1, 1, /* NOTPROP, PROP, EXTUNI */ |
| 179 |
|
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ |
| 180 |
|
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ |
| 181 |
|
1, /* Char */ |
| 182 |
|
1, /* Charnc */ |
| 183 |
|
1, /* not */ |
| 184 |
|
/* Positive single-char repeats */ |
| 185 |
|
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ |
| 186 |
|
1, 1, 1, /* upto, minupto, exact */ |
| 187 |
|
1, 1, 1, 1, /* *+, ++, ?+, upto+ */ |
| 188 |
|
/* Negative single-char repeats - only for chars < 256 */ |
| 189 |
|
1, 1, 1, 1, 1, 1, /* NOT *, *?, +, +?, ?, ?? */ |
| 190 |
|
1, 1, 1, /* NOT upto, minupto, exact */ |
| 191 |
|
1, 1, 1, 1, /* NOT *+, ++, ?+, upto+ */ |
| 192 |
|
/* Positive type repeats */ |
| 193 |
|
1, 1, 1, 1, 1, 1, /* Type *, *?, +, +?, ?, ?? */ |
| 194 |
|
1, 1, 1, /* Type upto, minupto, exact */ |
| 195 |
|
1, 1, 1, 1, /* Type *+, ++, ?+, upto+ */ |
| 196 |
|
/* Character class & ref repeats */ |
| 197 |
|
1, 1, 1, 1, 1, 1, /* *, *?, +, +?, ?, ?? */ |
| 198 |
|
1, 1, /* CRRANGE, CRMINRANGE */ |
| 199 |
|
1, /* CLASS */ |
| 200 |
|
1, /* NCLASS */ |
| 201 |
|
1, /* XCLASS - variable length */ |
| 202 |
|
0, /* REF */ |
| 203 |
|
0, /* RECURSE */ |
| 204 |
|
0, /* CALLOUT */ |
| 205 |
|
0, /* Alt */ |
| 206 |
|
0, /* Ket */ |
| 207 |
|
0, /* KetRmax */ |
| 208 |
|
0, /* KetRmin */ |
| 209 |
|
0, /* Assert */ |
| 210 |
|
0, /* Assert not */ |
| 211 |
|
0, /* Assert behind */ |
| 212 |
|
0, /* Assert behind not */ |
| 213 |
|
0, /* Reverse */ |
| 214 |
|
0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */ |
| 215 |
|
0, 0, 0, /* SBRA, SCBRA, SCOND */ |
| 216 |
|
0, /* CREF */ |
| 217 |
|
0, /* RREF */ |
| 218 |
|
0, /* DEF */ |
| 219 |
|
0, 0, /* BRAZERO, BRAMINZERO */ |
| 220 |
|
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */ |
| 221 |
|
0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */ |
| 222 |
}; |
}; |
| 223 |
|
|
| 224 |
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, |
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, |
| 547 |
unsigned int c, d; |
unsigned int c, d; |
| 548 |
int forced_fail = 0; |
int forced_fail = 0; |
| 549 |
int reached_end = 0; |
int reached_end = 0; |
| 550 |
|
BOOL could_continue = FALSE; |
| 551 |
|
|
| 552 |
/* Make the new state list into the active state list and empty the |
/* Make the new state list into the active state list and empty the |
| 553 |
new state list. */ |
new state list. */ |
| 655 |
|
|
| 656 |
code = start_code + state_offset; |
code = start_code + state_offset; |
| 657 |
codevalue = *code; |
codevalue = *code; |
| 658 |
|
|
| 659 |
|
/* If this opcode inspects a character, but we are at the end of the |
| 660 |
|
subject, remember the fact so that we can support PCRE_PARTIAL_HARD. */ |
| 661 |
|
|
| 662 |
|
if (clen == 0 && poptable[codevalue] != 0) |
| 663 |
|
could_continue = TRUE; |
| 664 |
|
|
| 665 |
/* If this opcode is followed by an inline character, load it. It is |
/* If this opcode is followed by an inline character, load it. It is |
| 666 |
tempting to test for the presence of a subject character here, but that |
tempting to test for the presence of a subject character here, but that |
| 2587 |
/* We have finished the processing at the current subject character. If no |
/* We have finished the processing at the current subject character. If no |
| 2588 |
new states have been set for the next character, we have found all the |
new states have been set for the next character, we have found all the |
| 2589 |
matches that we are going to find. If we are at the top level and partial |
matches that we are going to find. If we are at the top level and partial |
| 2590 |
matching has been requested, check for appropriate conditions. The "forced_ |
matching has been requested, check for appropriate conditions. |
| 2591 |
fail" variable counts the number of (*F) encountered for the character. If it |
|
| 2592 |
is equal to the original active_count (saved in workspace[1]) it means that |
The "forced_ fail" variable counts the number of (*F) encountered for the |
| 2593 |
(*F) was found on every active state. In this case we don't want to give a |
character. If it is equal to the original active_count (saved in |
| 2594 |
partial match. */ |
workspace[1]) it means that (*F) was found on every active state. In this |
| 2595 |
|
case we don't want to give a partial match. |
| 2596 |
|
|
| 2597 |
|
The "reached_end" variable counts the number of threads that have reached the |
| 2598 |
|
end of the pattern. The "could_continue" variable is true if a thread could |
| 2599 |
|
have continued but for the fact that the end of the subject was reached. */ |
| 2600 |
|
|
| 2601 |
if (new_count <= 0) |
if (new_count <= 0) |
| 2602 |
{ |
{ |
| 2603 |
if (rlevel == 1 && /* Top level, and */ |
if (rlevel == 1 && /* Top level, and */ |
| 2604 |
reached_end != workspace[1] && /* Not all reached end */ |
( /* either... */ |
| 2605 |
|
reached_end != workspace[1] || /* Not all reached end */ |
| 2606 |
|
could_continue /* or some could go on */ |
| 2607 |
|
) && /* and... */ |
| 2608 |
forced_fail != workspace[1] && /* Not all forced fail & */ |
forced_fail != workspace[1] && /* Not all forced fail & */ |
| 2609 |
( /* either... */ |
( /* either... */ |
| 2610 |
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */ |
(md->moptions & PCRE_PARTIAL_HARD) != 0 /* Hard partial */ |