| 241 |
/* 10 */ |
/* 10 */ |
| 242 |
"operand of unlimited repeat could match the empty string\0" /** DEAD **/ |
"operand of unlimited repeat could match the empty string\0" /** DEAD **/ |
| 243 |
"internal error: unexpected repeat\0" |
"internal error: unexpected repeat\0" |
| 244 |
"unrecognized character after (?\0" |
"unrecognized character after (? or (?-\0" |
| 245 |
"POSIX named classes are supported only within a class\0" |
"POSIX named classes are supported only within a class\0" |
| 246 |
"missing )\0" |
"missing )\0" |
| 247 |
/* 15 */ |
/* 15 */ |
| 300 |
"(*VERB) with an argument is not supported\0" |
"(*VERB) with an argument is not supported\0" |
| 301 |
/* 60 */ |
/* 60 */ |
| 302 |
"(*VERB) not recognized\0" |
"(*VERB) not recognized\0" |
| 303 |
"number is too big"; |
"number is too big\0" |
| 304 |
|
"subpattern name expected\0" |
| 305 |
|
"digit expected after (?+"; |
| 306 |
|
|
| 307 |
|
|
| 308 |
/* Table to identify digits and hex digits. This is used when compiling |
/* Table to identify digits and hex digits. This is used when compiling |
| 498 |
|
|
| 499 |
if (c == 0) *errorcodeptr = ERR1; |
if (c == 0) *errorcodeptr = ERR1; |
| 500 |
|
|
| 501 |
/* Non-alphamerics are literals. For digits or letters, do an initial lookup in |
/* Non-alphanumerics are literals. For digits or letters, do an initial lookup |
| 502 |
a table. A non-zero result is something that can be returned immediately. |
in a table. A non-zero result is something that can be returned immediately. |
| 503 |
Otherwise further processing may be required. */ |
Otherwise further processing may be required. */ |
| 504 |
|
|
| 505 |
#ifndef EBCDIC /* ASCII coding */ |
#ifndef EBCDIC /* ASCII coding */ |
| 506 |
else if (c < '0' || c > 'z') {} /* Not alphameric */ |
else if (c < '0' || c > 'z') {} /* Not alphanumeric */ |
| 507 |
else if ((i = escapes[c - '0']) != 0) c = i; |
else if ((i = escapes[c - '0']) != 0) c = i; |
| 508 |
|
|
| 509 |
#else /* EBCDIC coding */ |
#else /* EBCDIC coding */ |
| 510 |
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */ |
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphanumeric */ |
| 511 |
else if ((i = escapes[c - 0x48]) != 0) c = i; |
else if ((i = escapes[c - 0x48]) != 0) c = i; |
| 512 |
#endif |
#endif |
| 513 |
|
|
| 724 |
break; |
break; |
| 725 |
|
|
| 726 |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
| 727 |
other alphameric following \ is an error if PCRE_EXTRA was set; otherwise, |
other alphanumeric following \ is an error if PCRE_EXTRA was set; |
| 728 |
for Perl compatibility, it is a literal. This code looks a bit odd, but |
otherwise, for Perl compatibility, it is a literal. This code looks a bit |
| 729 |
there used to be some cases other than the default, and there may be again |
odd, but there used to be some cases other than the default, and there may |
| 730 |
in future, so I haven't "optimized" it. */ |
be again in future, so I haven't "optimized" it. */ |
| 731 |
|
|
| 732 |
default: |
default: |
| 733 |
if ((options & PCRE_EXTRA) != 0) switch(c) |
if ((options & PCRE_EXTRA) != 0) switch(c) |
| 1508 |
can match the empty string or not. It is called from could_be_empty() |
can match the empty string or not. It is called from could_be_empty() |
| 1509 |
below and from compile_branch() when checking for an unlimited repeat of a |
below and from compile_branch() when checking for an unlimited repeat of a |
| 1510 |
group that can match nothing. Note that first_significant_code() skips over |
group that can match nothing. Note that first_significant_code() skips over |
| 1511 |
assertions. If we hit an unclosed bracket, we return "empty" - this means we've |
backward and negative forward assertions when its final argument is TRUE. If we |
| 1512 |
struck an inner bracket whose current branch will already have been scanned. |
hit an unclosed bracket, we return "empty" - this means we've struck an inner |
| 1513 |
|
bracket whose current branch will already have been scanned. |
| 1514 |
|
|
| 1515 |
Arguments: |
Arguments: |
| 1516 |
code points to start of search |
code points to start of search |
| 1532 |
|
|
| 1533 |
c = *code; |
c = *code; |
| 1534 |
|
|
| 1535 |
|
/* Skip over forward assertions; the other assertions are skipped by |
| 1536 |
|
first_significant_code() with a TRUE final argument. */ |
| 1537 |
|
|
| 1538 |
|
if (c == OP_ASSERT) |
| 1539 |
|
{ |
| 1540 |
|
do code += GET(code, 1); while (*code == OP_ALT); |
| 1541 |
|
c = *code; |
| 1542 |
|
continue; |
| 1543 |
|
} |
| 1544 |
|
|
| 1545 |
/* Groups with zero repeats can of course be empty; skip them. */ |
/* Groups with zero repeats can of course be empty; skip them. */ |
| 1546 |
|
|
| 1547 |
if (c == OP_BRAZERO || c == OP_BRAMINZERO) |
if (c == OP_BRAZERO || c == OP_BRAMINZERO) |
| 2396 |
for (;; ptr++) |
for (;; ptr++) |
| 2397 |
{ |
{ |
| 2398 |
BOOL negate_class; |
BOOL negate_class; |
| 2399 |
BOOL should_flip_negation; |
BOOL should_flip_negation; |
| 2400 |
BOOL possessive_quantifier; |
BOOL possessive_quantifier; |
| 2401 |
BOOL is_quantifier; |
BOOL is_quantifier; |
| 2402 |
BOOL is_recurse; |
BOOL is_recurse; |
| 2645 |
else break; |
else break; |
| 2646 |
} |
} |
| 2647 |
|
|
| 2648 |
/* If a class contains a negative special such as \S, we need to flip the |
/* If a class contains a negative special such as \S, we need to flip the |
| 2649 |
negation flag at the end, so that support for characters > 255 works |
negation flag at the end, so that support for characters > 255 works |
| 2650 |
correctly (they are all included in the class). */ |
correctly (they are all included in the class). */ |
| 2651 |
|
|
| 2652 |
should_flip_negation = FALSE; |
should_flip_negation = FALSE; |
| 2723 |
if (*ptr == '^') |
if (*ptr == '^') |
| 2724 |
{ |
{ |
| 2725 |
local_negate = TRUE; |
local_negate = TRUE; |
| 2726 |
|
should_flip_negation = TRUE; /* Note negative special */ |
| 2727 |
ptr++; |
ptr++; |
| 2728 |
} |
} |
| 2729 |
|
|
| 2798 |
c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE); |
c = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE); |
| 2799 |
if (*errorcodeptr != 0) goto FAILED; |
if (*errorcodeptr != 0) goto FAILED; |
| 2800 |
|
|
| 2801 |
if (-c == ESC_b) c = '\b'; /* \b is backslash in a class */ |
if (-c == ESC_b) c = '\b'; /* \b is backspace in a class */ |
| 2802 |
else if (-c == ESC_X) c = 'X'; /* \X is literal X in a class */ |
else if (-c == ESC_X) c = 'X'; /* \X is literal X in a class */ |
| 2803 |
else if (-c == ESC_R) c = 'R'; /* \R is literal R in a class */ |
else if (-c == ESC_R) c = 'R'; /* \R is literal R in a class */ |
| 2804 |
else if (-c == ESC_Q) /* Handle start of quoted string */ |
else if (-c == ESC_Q) /* Handle start of quoted string */ |
| 2826 |
continue; |
continue; |
| 2827 |
|
|
| 2828 |
case ESC_D: |
case ESC_D: |
| 2829 |
should_flip_negation = TRUE; |
should_flip_negation = TRUE; |
| 2830 |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_digit]; |
| 2831 |
continue; |
continue; |
| 2832 |
|
|
| 2835 |
continue; |
continue; |
| 2836 |
|
|
| 2837 |
case ESC_W: |
case ESC_W: |
| 2838 |
should_flip_negation = TRUE; |
should_flip_negation = TRUE; |
| 2839 |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word]; |
| 2840 |
continue; |
continue; |
| 2841 |
|
|
| 2845 |
continue; |
continue; |
| 2846 |
|
|
| 2847 |
case ESC_S: |
case ESC_S: |
| 2848 |
should_flip_negation = TRUE; |
should_flip_negation = TRUE; |
| 2849 |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; |
for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_space]; |
| 2850 |
classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */ |
classbits[1] |= 0x08; /* Perl 5.004 onwards omits VT from \s */ |
| 2851 |
continue; |
continue; |
| 2852 |
|
|
|
case ESC_E: /* Perl ignores an orphan \E */ |
|
|
continue; |
|
|
|
|
| 2853 |
default: /* Not recognized; fall through */ |
default: /* Not recognized; fall through */ |
| 2854 |
break; /* Need "default" setting to stop compiler warning. */ |
break; /* Need "default" setting to stop compiler warning. */ |
| 2855 |
} |
} |
| 3084 |
d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE); |
d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE); |
| 3085 |
if (*errorcodeptr != 0) goto FAILED; |
if (*errorcodeptr != 0) goto FAILED; |
| 3086 |
|
|
| 3087 |
/* \b is backslash; \X is literal X; \R is literal R; any other |
/* \b is backspace; \X is literal X; \R is literal R; any other |
| 3088 |
special means the '-' was literal */ |
special means the '-' was literal */ |
| 3089 |
|
|
| 3090 |
if (d < 0) |
if (d < 0) |
| 3348 |
zeroreqbyte = reqbyte; |
zeroreqbyte = reqbyte; |
| 3349 |
|
|
| 3350 |
/* If there are characters with values > 255, we have to compile an |
/* If there are characters with values > 255, we have to compile an |
| 3351 |
extended class, with its own opcode, unless there was a negated special |
extended class, with its own opcode, unless there was a negated special |
| 3352 |
such as \S in the class, because in that case all characters > 255 are in |
such as \S in the class, because in that case all characters > 255 are in |
| 3353 |
the class, so any that were explicitly given as well can be ignored. If |
the class, so any that were explicitly given as well can be ignored. If |
| 3354 |
(when there are explicit characters > 255 that must be listed) there are no |
(when there are explicit characters > 255 that must be listed) there are no |
| 3355 |
characters < 256, we can omit the bitmap in the actual compiled code. */ |
characters < 256, we can omit the bitmap in the actual compiled code. */ |
| 3356 |
|
|
| 3381 |
} |
} |
| 3382 |
#endif |
#endif |
| 3383 |
|
|
| 3384 |
/* If there are no characters > 255, set the opcode to OP_CLASS or |
/* If there are no characters > 255, set the opcode to OP_CLASS or |
| 3385 |
OP_NCLASS, depending on whether the whole class was negated and whether |
OP_NCLASS, depending on whether the whole class was negated and whether |
| 3386 |
there were negative specials such as \S in the class. Then copy the 32-byte |
there were negative specials such as \S in the class. Then copy the 32-byte |
| 3387 |
map into the code vector, negating it if necessary. */ |
map into the code vector, negating it if necessary. */ |
| 3388 |
|
|
| 3389 |
*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; |
*code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS; |
| 3390 |
if (negate_class) |
if (negate_class) |
| 3391 |
{ |
{ |
| 4029 |
int len; |
int len; |
| 4030 |
if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT || |
if (*tempcode == OP_EXACT || *tempcode == OP_TYPEEXACT || |
| 4031 |
*tempcode == OP_NOTEXACT) |
*tempcode == OP_NOTEXACT) |
| 4032 |
tempcode += _pcre_OP_lengths[*tempcode]; |
tempcode += _pcre_OP_lengths[*tempcode] + |
| 4033 |
|
((*tempcode == OP_TYPEEXACT && |
| 4034 |
|
(tempcode[3] == OP_PROP || tempcode[3] == OP_NOTPROP))? 2:0); |
| 4035 |
len = code - tempcode; |
len = code - tempcode; |
| 4036 |
if (len > 0) switch (*tempcode) |
if (len > 0) switch (*tempcode) |
| 4037 |
{ |
{ |
| 4258 |
*errorcodeptr = ERR58; |
*errorcodeptr = ERR58; |
| 4259 |
goto FAILED; |
goto FAILED; |
| 4260 |
} |
} |
| 4261 |
if (refsign == '-') |
recno = (refsign == '-')? |
| 4262 |
|
cd->bracount - recno + 1 : recno +cd->bracount; |
| 4263 |
|
if (recno <= 0 || recno > cd->final_bracount) |
| 4264 |
{ |
{ |
| 4265 |
recno = cd->bracount - recno + 1; |
*errorcodeptr = ERR15; |
| 4266 |
if (recno <= 0) |
goto FAILED; |
|
{ |
|
|
*errorcodeptr = ERR15; |
|
|
goto FAILED; |
|
|
} |
|
| 4267 |
} |
} |
|
else recno += cd->bracount; |
|
| 4268 |
PUT2(code, 2+LINK_SIZE, recno); |
PUT2(code, 2+LINK_SIZE, recno); |
| 4269 |
break; |
break; |
| 4270 |
} |
} |
| 4336 |
skipbytes = 1; |
skipbytes = 1; |
| 4337 |
} |
} |
| 4338 |
|
|
| 4339 |
/* Check for the "name" actually being a subpattern number. */ |
/* Check for the "name" actually being a subpattern number. We are |
| 4340 |
|
in the second pass here, so final_bracount is set. */ |
| 4341 |
|
|
| 4342 |
else if (recno > 0) |
else if (recno > 0 && recno <= cd->final_bracount) |
| 4343 |
{ |
{ |
| 4344 |
PUT2(code, 2+LINK_SIZE, recno); |
PUT2(code, 2+LINK_SIZE, recno); |
| 4345 |
} |
} |
| 4533 |
|
|
| 4534 |
/* We come here from the Python syntax above that handles both |
/* We come here from the Python syntax above that handles both |
| 4535 |
references (?P=name) and recursion (?P>name), as well as falling |
references (?P=name) and recursion (?P>name), as well as falling |
| 4536 |
through from the Perl recursion syntax (?&name). */ |
through from the Perl recursion syntax (?&name). We also come here from |
| 4537 |
|
the Perl \k<name> or \k'name' back reference syntax and the \k{name} |
| 4538 |
|
.NET syntax. */ |
| 4539 |
|
|
| 4540 |
NAMED_REF_OR_RECURSE: |
NAMED_REF_OR_RECURSE: |
| 4541 |
name = ++ptr; |
name = ++ptr; |
| 4547 |
|
|
| 4548 |
if (lengthptr != NULL) |
if (lengthptr != NULL) |
| 4549 |
{ |
{ |
| 4550 |
|
if (namelen == 0) |
| 4551 |
|
{ |
| 4552 |
|
*errorcodeptr = ERR62; |
| 4553 |
|
goto FAILED; |
| 4554 |
|
} |
| 4555 |
if (*ptr != terminator) |
if (*ptr != terminator) |
| 4556 |
{ |
{ |
| 4557 |
*errorcodeptr = ERR42; |
*errorcodeptr = ERR42; |
| 4565 |
recno = 0; |
recno = 0; |
| 4566 |
} |
} |
| 4567 |
|
|
| 4568 |
/* In the real compile, seek the name in the table */ |
/* In the real compile, seek the name in the table. We check the name |
| 4569 |
|
first, and then check that we have reached the end of the name in the |
| 4570 |
|
table. That way, if the name that is longer than any in the table, |
| 4571 |
|
the comparison will fail without reading beyond the table entry. */ |
| 4572 |
|
|
| 4573 |
else |
else |
| 4574 |
{ |
{ |
| 4575 |
slot = cd->name_table; |
slot = cd->name_table; |
| 4576 |
for (i = 0; i < cd->names_found; i++) |
for (i = 0; i < cd->names_found; i++) |
| 4577 |
{ |
{ |
| 4578 |
if (strncmp((char *)name, (char *)slot+2, namelen) == 0) break; |
if (strncmp((char *)name, (char *)slot+2, namelen) == 0 && |
| 4579 |
|
slot[2+namelen] == 0) |
| 4580 |
|
break; |
| 4581 |
slot += cd->name_entry_size; |
slot += cd->name_entry_size; |
| 4582 |
} |
} |
| 4583 |
|
|
| 4614 |
{ |
{ |
| 4615 |
const uschar *called; |
const uschar *called; |
| 4616 |
|
|
| 4617 |
if ((refsign = *ptr) == '+') ptr++; |
if ((refsign = *ptr) == '+') |
| 4618 |
|
{ |
| 4619 |
|
ptr++; |
| 4620 |
|
if ((digitab[*ptr] & ctype_digit) == 0) |
| 4621 |
|
{ |
| 4622 |
|
*errorcodeptr = ERR63; |
| 4623 |
|
goto FAILED; |
| 4624 |
|
} |
| 4625 |
|
} |
| 4626 |
else if (refsign == '-') |
else if (refsign == '-') |
| 4627 |
{ |
{ |
| 4628 |
if ((digitab[ptr[1]] & ctype_digit) == 0) |
if ((digitab[ptr[1]] & ctype_digit) == 0) |
| 5949 |
no longer needed, so hopefully this workspace will never overflow, though there |
no longer needed, so hopefully this workspace will never overflow, though there |
| 5950 |
is a test for its doing so. */ |
is a test for its doing so. */ |
| 5951 |
|
|
| 5952 |
cd->bracount = 0; |
cd->bracount = cd->final_bracount = 0; |
| 5953 |
cd->names_found = 0; |
cd->names_found = 0; |
| 5954 |
cd->name_entry_size = 0; |
cd->name_entry_size = 0; |
| 5955 |
cd->name_table = NULL; |
cd->name_table = NULL; |
| 6026 |
field; this time it's used for remembering forward references to subpatterns. |
field; this time it's used for remembering forward references to subpatterns. |
| 6027 |
*/ |
*/ |
| 6028 |
|
|
| 6029 |
|
cd->final_bracount = cd->bracount; /* Save for checking forward references */ |
| 6030 |
cd->bracount = 0; |
cd->bracount = 0; |
| 6031 |
cd->names_found = 0; |
cd->names_found = 0; |
| 6032 |
cd->name_table = (uschar *)re + re->name_table_offset; |
cd->name_table = (uschar *)re + re->name_table_offset; |