| 7 |
below for why this module is different). |
below for why this module is different). |
| 8 |
|
|
| 9 |
Written by Philip Hazel |
Written by Philip Hazel |
| 10 |
Copyright (c) 1997-2009 University of Cambridge |
Copyright (c) 1997-2010 University of Cambridge |
| 11 |
|
|
| 12 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 13 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 106 |
|
|
| 107 |
|
|
| 108 |
/* This table identifies those opcodes that are followed immediately by a |
/* This table identifies those opcodes that are followed immediately by a |
| 109 |
character that is to be tested in some way. This makes is possible to |
character that is to be tested in some way. This makes it possible to |
| 110 |
centralize the loading of these characters. In the case of Type * etc, the |
centralize the loading of these characters. In the case of Type * etc, the |
| 111 |
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a |
"character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a |
| 112 |
small value. Non-zero values in the table are the offsets from the opcode where |
small value. Non-zero values in the table are the offsets from the opcode where |
| 118 |
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ |
0, 0, 0, 0, 0, /* \A, \G, \K, \B, \b */ |
| 119 |
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */ |
0, 0, 0, 0, 0, 0, /* \D, \d, \S, \s, \W, \w */ |
| 120 |
0, 0, 0, /* Any, AllAny, Anybyte */ |
0, 0, 0, /* Any, AllAny, Anybyte */ |
| 121 |
0, 0, 0, /* NOTPROP, PROP, EXTUNI */ |
0, 0, /* \P, \p */ |
| 122 |
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */ |
0, 0, 0, 0, 0, /* \R, \H, \h, \V, \v */ |
| 123 |
|
0, /* \X */ |
| 124 |
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ |
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ |
| 125 |
1, /* Char */ |
1, /* Char */ |
| 126 |
1, /* Charnc */ |
1, /* Charnc */ |
| 157 |
0, /* Reverse */ |
0, /* Reverse */ |
| 158 |
0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */ |
0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */ |
| 159 |
0, 0, 0, /* SBRA, SCBRA, SCOND */ |
0, 0, 0, /* SBRA, SCBRA, SCOND */ |
| 160 |
0, /* CREF */ |
0, 0, /* CREF, NCREF */ |
| 161 |
0, /* RREF */ |
0, 0, /* RREF, NRREF */ |
| 162 |
0, /* DEF */ |
0, /* DEF */ |
| 163 |
0, 0, /* BRAZERO, BRAMINZERO */ |
0, 0, /* BRAZERO, BRAMINZERO */ |
| 164 |
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */ |
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG, */ |
| 165 |
0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */ |
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG, */ |
| 166 |
|
0, 0, 0, 0, 0 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */ |
| 167 |
}; |
}; |
| 168 |
|
|
| 169 |
/* This table identifies those opcodes that inspect a character. It is used to |
/* This table identifies those opcodes that inspect a character. It is used to |
| 176 |
0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */ |
0, 0, 0, 1, 1, /* \A, \G, \K, \B, \b */ |
| 177 |
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ |
1, 1, 1, 1, 1, 1, /* \D, \d, \S, \s, \W, \w */ |
| 178 |
1, 1, 1, /* Any, AllAny, Anybyte */ |
1, 1, 1, /* Any, AllAny, Anybyte */ |
| 179 |
1, 1, 1, /* NOTPROP, PROP, EXTUNI */ |
1, 1, /* \P, \p */ |
| 180 |
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ |
1, 1, 1, 1, 1, /* \R, \H, \h, \V, \v */ |
| 181 |
|
1, /* \X */ |
| 182 |
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ |
0, 0, 0, 0, 0, /* \Z, \z, Opt, ^, $ */ |
| 183 |
1, /* Char */ |
1, /* Char */ |
| 184 |
1, /* Charnc */ |
1, /* Charnc */ |
| 215 |
0, /* Reverse */ |
0, /* Reverse */ |
| 216 |
0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */ |
0, 0, 0, 0, /* ONCE, BRA, CBRA, COND */ |
| 217 |
0, 0, 0, /* SBRA, SCBRA, SCOND */ |
0, 0, 0, /* SBRA, SCBRA, SCOND */ |
| 218 |
0, /* CREF */ |
0, 0, /* CREF, NCREF */ |
| 219 |
0, /* RREF */ |
0, 0, /* RREF, NRREF */ |
| 220 |
0, /* DEF */ |
0, /* DEF */ |
| 221 |
0, 0, /* BRAZERO, BRAMINZERO */ |
0, 0, /* BRAZERO, BRAMINZERO */ |
| 222 |
0, 0, 0, 0, /* PRUNE, SKIP, THEN, COMMIT */ |
0, 0, 0, /* MARK, PRUNE, PRUNE_ARG, */ |
| 223 |
0, 0, 0, 0 /* FAIL, ACCEPT, CLOSE, SKIPZERO */ |
0, 0, 0, 0, /* SKIP, SKIP_ARG, THEN, THEN_ARG, */ |
| 224 |
|
0, 0, 0, 0, 0 /* COMMIT, FAIL, ACCEPT, CLOSE, SKIPZERO */ |
| 225 |
}; |
}; |
| 226 |
|
|
| 227 |
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, |
/* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W, |
| 259 |
#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int)) |
#define INTS_PER_STATEBLOCK (sizeof(stateblock)/sizeof(int)) |
| 260 |
|
|
| 261 |
|
|
| 262 |
#ifdef DEBUG |
#ifdef PCRE_DEBUG |
| 263 |
/************************************************* |
/************************************************* |
| 264 |
* Print character string * |
* Print character string * |
| 265 |
*************************************************/ |
*************************************************/ |
| 563 |
workspace[0] ^= 1; /* Remember for the restarting feature */ |
workspace[0] ^= 1; /* Remember for the restarting feature */ |
| 564 |
workspace[1] = active_count; |
workspace[1] = active_count; |
| 565 |
|
|
| 566 |
#ifdef DEBUG |
#ifdef PCRE_DEBUG |
| 567 |
printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); |
printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP); |
| 568 |
pchars((uschar *)ptr, strlen((char *)ptr), stdout); |
pchars((uschar *)ptr, strlen((char *)ptr), stdout); |
| 569 |
printf("\"\n"); |
printf("\"\n"); |
| 609 |
int state_offset = current_state->offset; |
int state_offset = current_state->offset; |
| 610 |
int count, codevalue, rrc; |
int count, codevalue, rrc; |
| 611 |
|
|
| 612 |
#ifdef DEBUG |
#ifdef PCRE_DEBUG |
| 613 |
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); |
printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset); |
| 614 |
if (clen == 0) printf("EOL\n"); |
if (clen == 0) printf("EOL\n"); |
| 615 |
else if (c > 32 && c < 127) printf("'%c'\n", c); |
else if (c > 32 && c < 127) printf("'%c'\n", c); |
| 710 |
|
|
| 711 |
switch (codevalue) |
switch (codevalue) |
| 712 |
{ |
{ |
| 713 |
|
/* ========================================================================== */ |
| 714 |
|
/* These cases are never obeyed. This is a fudge that causes a compile- |
| 715 |
|
time error if the vectors coptable or poptable, which are indexed by |
| 716 |
|
opcode, are not the correct length. It seems to be the only way to do |
| 717 |
|
such a check at compile time, as the sizeof() operator does not work |
| 718 |
|
in the C preprocessor. */ |
| 719 |
|
|
| 720 |
|
case OP_TABLE_LENGTH: |
| 721 |
|
case OP_TABLE_LENGTH + |
| 722 |
|
((sizeof(coptable) == OP_TABLE_LENGTH) && |
| 723 |
|
(sizeof(poptable) == OP_TABLE_LENGTH)): |
| 724 |
|
break; |
| 725 |
|
|
| 726 |
/* ========================================================================== */ |
/* ========================================================================== */ |
| 727 |
/* Reached a closing bracket. If not at the end of the pattern, carry |
/* Reached a closing bracket. If not at the end of the pattern, carry |
| 2315 |
rlevel, /* function recursion level */ |
rlevel, /* function recursion level */ |
| 2316 |
recursing); /* pass on regex recursion */ |
recursing); /* pass on regex recursion */ |
| 2317 |
|
|
| 2318 |
|
if (rc == PCRE_ERROR_DFA_UITEM) return rc; |
| 2319 |
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) |
if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK)) |
| 2320 |
{ ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); } |
{ ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); } |
| 2321 |
} |
} |
| 2406 |
rlevel, /* function recursion level */ |
rlevel, /* function recursion level */ |
| 2407 |
recursing); /* pass on regex recursion */ |
recursing); /* pass on regex recursion */ |
| 2408 |
|
|
| 2409 |
|
if (rc == PCRE_ERROR_DFA_UITEM) return rc; |
| 2410 |
if ((rc >= 0) == |
if ((rc >= 0) == |
| 2411 |
(condcode == OP_ASSERT || condcode == OP_ASSERTBACK)) |
(condcode == OP_ASSERT || condcode == OP_ASSERTBACK)) |
| 2412 |
{ ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); } |
{ ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); } |
| 3007 |
bytes to avoid spending too much time in this optimization. */ |
bytes to avoid spending too much time in this optimization. */ |
| 3008 |
|
|
| 3009 |
if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 && |
if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 && |
| 3010 |
end_subject - current_subject < study->minlength) |
(pcre_uint32)(end_subject - current_subject) < study->minlength) |
| 3011 |
return PCRE_ERROR_NOMATCH; |
return PCRE_ERROR_NOMATCH; |
| 3012 |
|
|
| 3013 |
/* If req_byte is set, we know that that character must appear in the |
/* If req_byte is set, we know that that character must appear in the |