| 9 |
|
|
| 10 |
Written by: Philip Hazel <ph10@cam.ac.uk> |
Written by: Philip Hazel <ph10@cam.ac.uk> |
| 11 |
|
|
| 12 |
Copyright (c) 1997-1999 University of Cambridge |
Copyright (c) 1997-2000 University of Cambridge |
| 13 |
|
|
| 14 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 15 |
Permission is granted to anyone to use this software for any purpose on any |
Permission is granted to anyone to use this software for any purpose on any |
| 66 |
#define BRASTACK_SIZE 200 |
#define BRASTACK_SIZE 200 |
| 67 |
|
|
| 68 |
|
|
| 69 |
|
/* The number of bytes in a literal character string above which we can't add |
| 70 |
|
any more is different when UTF-8 characters may be encountered. */ |
| 71 |
|
|
| 72 |
|
#ifdef SUPPORT_UTF8 |
| 73 |
|
#define MAXLIT 250 |
| 74 |
|
#else |
| 75 |
|
#define MAXLIT 255 |
| 76 |
|
#endif |
| 77 |
|
|
| 78 |
|
|
| 79 |
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ |
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ |
| 80 |
|
|
| 81 |
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
static const char rep_min[] = { 0, 0, 1, 1, 0, 0 }; |
| 92 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
| 93 |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", "{", |
| 94 |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
"*", "*?", "+", "+?", "?", "??", "{", "{", |
| 95 |
"class", "Ref", |
"class", "Ref", "Recurse", |
| 96 |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", |
"Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", |
| 97 |
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", |
"AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref", |
| 98 |
"Brazero", "Braminzero", "Bra" |
"Brazero", "Braminzero", "Bra" |
| 117 |
0, 0, -ESC_z /* x - z */ |
0, 0, -ESC_z /* x - z */ |
| 118 |
}; |
}; |
| 119 |
|
|
| 120 |
|
/* Tables of names of POSIX character classes and their lengths. The list is |
| 121 |
|
terminated by a zero length entry. The first three must be alpha, upper, lower, |
| 122 |
|
as this is assumed for handling case independence. */ |
| 123 |
|
|
| 124 |
|
static const char *posix_names[] = { |
| 125 |
|
"alpha", "lower", "upper", |
| 126 |
|
"alnum", "ascii", "cntrl", "digit", "graph", |
| 127 |
|
"print", "punct", "space", "word", "xdigit" }; |
| 128 |
|
|
| 129 |
|
static const uschar posix_name_lengths[] = { |
| 130 |
|
5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 }; |
| 131 |
|
|
| 132 |
|
/* Table of class bit maps for each POSIX class; up to three may be combined |
| 133 |
|
to form the class. */ |
| 134 |
|
|
| 135 |
|
static const int posix_class_maps[] = { |
| 136 |
|
cbit_lower, cbit_upper, -1, /* alpha */ |
| 137 |
|
cbit_lower, -1, -1, /* lower */ |
| 138 |
|
cbit_upper, -1, -1, /* upper */ |
| 139 |
|
cbit_digit, cbit_lower, cbit_upper, /* alnum */ |
| 140 |
|
cbit_print, cbit_cntrl, -1, /* ascii */ |
| 141 |
|
cbit_cntrl, -1, -1, /* cntrl */ |
| 142 |
|
cbit_digit, -1, -1, /* digit */ |
| 143 |
|
cbit_graph, -1, -1, /* graph */ |
| 144 |
|
cbit_print, -1, -1, /* print */ |
| 145 |
|
cbit_punct, -1, -1, /* punct */ |
| 146 |
|
cbit_space, -1, -1, /* space */ |
| 147 |
|
cbit_word, -1, -1, /* word */ |
| 148 |
|
cbit_xdigit,-1, -1 /* xdigit */ |
| 149 |
|
}; |
| 150 |
|
|
| 151 |
|
|
| 152 |
/* Definition to allow mutual recursion */ |
/* Definition to allow mutual recursion */ |
| 153 |
|
|
| 154 |
static BOOL |
static BOOL |
| 155 |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
compile_regex(int, int, int *, uschar **, const uschar **, const char **, |
| 156 |
BOOL, int, compile_data *); |
BOOL, int, int *, int *, compile_data *); |
| 157 |
|
|
| 158 |
|
/* Structure for building a chain of data that actually lives on the |
| 159 |
|
stack, for holding the values of the subject pointer at the start of each |
| 160 |
|
subpattern, so as to detect when an empty string has been matched by a |
| 161 |
|
subpattern - to break infinite loops. */ |
| 162 |
|
|
| 163 |
|
typedef struct eptrblock { |
| 164 |
|
struct eptrblock *prev; |
| 165 |
|
const uschar *saved_eptr; |
| 166 |
|
} eptrblock; |
| 167 |
|
|
| 168 |
|
/* Flag bits for the match() function */ |
| 169 |
|
|
| 170 |
|
#define match_condassert 0x01 /* Called to check a condition assertion */ |
| 171 |
|
#define match_isgroup 0x02 /* Set if start of bracketed group */ |
| 172 |
|
|
| 173 |
|
|
| 174 |
|
|
| 186 |
|
|
| 187 |
|
|
| 188 |
|
|
| 189 |
|
/************************************************* |
| 190 |
|
* Macros and tables for character handling * |
| 191 |
|
*************************************************/ |
| 192 |
|
|
| 193 |
|
/* When UTF-8 encoding is being used, a character is no longer just a single |
| 194 |
|
byte. The macros for character handling generate simple sequences when used in |
| 195 |
|
byte-mode, and more complicated ones for UTF-8 characters. */ |
| 196 |
|
|
| 197 |
|
#ifndef SUPPORT_UTF8 |
| 198 |
|
#define GETCHARINC(c, eptr) c = *eptr++; |
| 199 |
|
#define GETCHARLEN(c, eptr, len) c = *eptr; |
| 200 |
|
#define BACKCHAR(eptr) |
| 201 |
|
|
| 202 |
|
#else /* SUPPORT_UTF8 */ |
| 203 |
|
|
| 204 |
|
/* Get the next UTF-8 character, advancing the pointer */ |
| 205 |
|
|
| 206 |
|
#define GETCHARINC(c, eptr) \ |
| 207 |
|
c = *eptr++; \ |
| 208 |
|
if (md->utf8 && (c & 0xc0) == 0xc0) \ |
| 209 |
|
{ \ |
| 210 |
|
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ |
| 211 |
|
int s = 6 - a; /* Amount to shift next byte */ \ |
| 212 |
|
c &= utf8_table3[a]; /* Low order bits from first byte */ \ |
| 213 |
|
while (a-- > 0) \ |
| 214 |
|
{ \ |
| 215 |
|
c |= (*eptr++ & 0x3f) << s; \ |
| 216 |
|
s += 6; \ |
| 217 |
|
} \ |
| 218 |
|
} |
| 219 |
|
|
| 220 |
|
/* Get the next UTF-8 character, not advancing the pointer, setting length */ |
| 221 |
|
|
| 222 |
|
#define GETCHARLEN(c, eptr, len) \ |
| 223 |
|
c = *eptr; \ |
| 224 |
|
len = 1; \ |
| 225 |
|
if (md->utf8 && (c & 0xc0) == 0xc0) \ |
| 226 |
|
{ \ |
| 227 |
|
int i; \ |
| 228 |
|
int a = utf8_table4[c & 0x3f]; /* Number of additional bytes */ \ |
| 229 |
|
int s = 6 - a; /* Amount to shift next byte */ \ |
| 230 |
|
c &= utf8_table3[a]; /* Low order bits from first byte */ \ |
| 231 |
|
for (i = 1; i <= a; i++) \ |
| 232 |
|
{ \ |
| 233 |
|
c |= (eptr[i] & 0x3f) << s; \ |
| 234 |
|
s += 6; \ |
| 235 |
|
} \ |
| 236 |
|
len += a; \ |
| 237 |
|
} |
| 238 |
|
|
| 239 |
|
/* If the pointer is not at the start of a character, move it back until |
| 240 |
|
it is. */ |
| 241 |
|
|
| 242 |
|
#define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--; |
| 243 |
|
|
| 244 |
|
#endif |
| 245 |
|
|
| 246 |
|
|
| 247 |
|
|
| 248 |
/************************************************* |
/************************************************* |
| 249 |
* Default character tables * |
* Default character tables * |
| 259 |
|
|
| 260 |
|
|
| 261 |
|
|
| 262 |
|
#ifdef SUPPORT_UTF8 |
| 263 |
|
/************************************************* |
| 264 |
|
* Tables for UTF-8 support * |
| 265 |
|
*************************************************/ |
| 266 |
|
|
| 267 |
|
/* These are the breakpoints for different numbers of bytes in a UTF-8 |
| 268 |
|
character. */ |
| 269 |
|
|
| 270 |
|
static int utf8_table1[] = { 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff}; |
| 271 |
|
|
| 272 |
|
/* These are the indicator bits and the mask for the data bits to set in the |
| 273 |
|
first byte of a character, indexed by the number of additional bytes. */ |
| 274 |
|
|
| 275 |
|
static int utf8_table2[] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc}; |
| 276 |
|
static int utf8_table3[] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01}; |
| 277 |
|
|
| 278 |
|
/* Table of the number of extra characters, indexed by the first character |
| 279 |
|
masked with 0x3f. The highest number for a valid UTF-8 character is in fact |
| 280 |
|
0x3d. */ |
| 281 |
|
|
| 282 |
|
static uschar utf8_table4[] = { |
| 283 |
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 284 |
|
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, |
| 285 |
|
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, |
| 286 |
|
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 }; |
| 287 |
|
|
| 288 |
|
|
| 289 |
|
/************************************************* |
| 290 |
|
* Convert character value to UTF-8 * |
| 291 |
|
*************************************************/ |
| 292 |
|
|
| 293 |
|
/* This function takes an integer value in the range 0 - 0x7fffffff |
| 294 |
|
and encodes it as a UTF-8 character in 0 to 6 bytes. |
| 295 |
|
|
| 296 |
|
Arguments: |
| 297 |
|
cvalue the character value |
| 298 |
|
buffer pointer to buffer for result - at least 6 bytes long |
| 299 |
|
|
| 300 |
|
Returns: number of characters placed in the buffer |
| 301 |
|
*/ |
| 302 |
|
|
| 303 |
|
static int |
| 304 |
|
ord2utf8(int cvalue, uschar *buffer) |
| 305 |
|
{ |
| 306 |
|
register int i, j; |
| 307 |
|
for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) |
| 308 |
|
if (cvalue <= utf8_table1[i]) break; |
| 309 |
|
*buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]); |
| 310 |
|
cvalue >>= 6 - i; |
| 311 |
|
for (j = 0; j < i; j++) |
| 312 |
|
{ |
| 313 |
|
*buffer++ = 0x80 | (cvalue & 0x3f); |
| 314 |
|
cvalue >>= 6; |
| 315 |
|
} |
| 316 |
|
return i + 1; |
| 317 |
|
} |
| 318 |
|
#endif |
| 319 |
|
|
| 320 |
|
|
| 321 |
|
|
| 322 |
/************************************************* |
/************************************************* |
| 323 |
* Return version string * |
* Return version string * |
| 324 |
*************************************************/ |
*************************************************/ |
| 325 |
|
|
| 326 |
|
#define STRING(a) # a |
| 327 |
|
#define XSTRING(s) STRING(s) |
| 328 |
|
|
| 329 |
const char * |
const char * |
| 330 |
pcre_version(void) |
pcre_version(void) |
| 331 |
{ |
{ |
| 332 |
return PCRE_VERSION; |
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE); |
| 333 |
} |
} |
| 334 |
|
|
| 335 |
|
|
| 336 |
|
|
| 337 |
|
|
| 338 |
/************************************************* |
/************************************************* |
| 339 |
* Return info about a compiled pattern * |
* (Obsolete) Return info about compiled pattern * |
| 340 |
*************************************************/ |
*************************************************/ |
| 341 |
|
|
| 342 |
/* This function picks potentially useful data out of the private |
/* This is the original "info" function. It picks potentially useful data out |
| 343 |
structure. |
of the private structure, but its interface was too rigid. It remains for |
| 344 |
|
backwards compatibility. The public options are passed back in an int - though |
| 345 |
|
the re->options field has been expanded to a long int, all the public options |
| 346 |
|
at the low end of it, and so even on 16-bit systems this will still be OK. |
| 347 |
|
Therefore, I haven't changed the API for pcre_info(). |
| 348 |
|
|
| 349 |
Arguments: |
Arguments: |
| 350 |
external_re points to compiled code |
external_re points to compiled code |
| 353 |
or -1 if multiline and all branches start ^, |
or -1 if multiline and all branches start ^, |
| 354 |
or -2 otherwise |
or -2 otherwise |
| 355 |
|
|
| 356 |
Returns: number of identifying extraction brackets |
Returns: number of capturing subpatterns |
| 357 |
or negative values on error |
or negative values on error |
| 358 |
*/ |
*/ |
| 359 |
|
|
| 363 |
const real_pcre *re = (const real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
| 364 |
if (re == NULL) return PCRE_ERROR_NULL; |
if (re == NULL) return PCRE_ERROR_NULL; |
| 365 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
| 366 |
if (optptr != NULL) *optptr = (re->options & PUBLIC_OPTIONS); |
if (optptr != NULL) *optptr = (int)(re->options & PUBLIC_OPTIONS); |
| 367 |
if (first_char != NULL) |
if (first_char != NULL) |
| 368 |
*first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char : |
*first_char = ((re->options & PCRE_FIRSTSET) != 0)? re->first_char : |
| 369 |
((re->options & PCRE_STARTLINE) != 0)? -1 : -2; |
((re->options & PCRE_STARTLINE) != 0)? -1 : -2; |
| 372 |
|
|
| 373 |
|
|
| 374 |
|
|
| 375 |
|
/************************************************* |
| 376 |
|
* Return info about compiled pattern * |
| 377 |
|
*************************************************/ |
| 378 |
|
|
| 379 |
|
/* This is a newer "info" function which has an extensible interface so |
| 380 |
|
that additional items can be added compatibly. |
| 381 |
|
|
| 382 |
|
Arguments: |
| 383 |
|
external_re points to compiled code |
| 384 |
|
external_study points to study data, or NULL |
| 385 |
|
what what information is required |
| 386 |
|
where where to put the information |
| 387 |
|
|
| 388 |
|
Returns: 0 if data returned, negative on error |
| 389 |
|
*/ |
| 390 |
|
|
| 391 |
|
int |
| 392 |
|
pcre_fullinfo(const pcre *external_re, const pcre_extra *study_data, int what, |
| 393 |
|
void *where) |
| 394 |
|
{ |
| 395 |
|
const real_pcre *re = (const real_pcre *)external_re; |
| 396 |
|
const real_pcre_extra *study = (const real_pcre_extra *)study_data; |
| 397 |
|
|
| 398 |
|
if (re == NULL || where == NULL) return PCRE_ERROR_NULL; |
| 399 |
|
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
| 400 |
|
|
| 401 |
|
switch (what) |
| 402 |
|
{ |
| 403 |
|
case PCRE_INFO_OPTIONS: |
| 404 |
|
*((unsigned long int *)where) = re->options & PUBLIC_OPTIONS; |
| 405 |
|
break; |
| 406 |
|
|
| 407 |
|
case PCRE_INFO_SIZE: |
| 408 |
|
*((size_t *)where) = re->size; |
| 409 |
|
break; |
| 410 |
|
|
| 411 |
|
case PCRE_INFO_CAPTURECOUNT: |
| 412 |
|
*((int *)where) = re->top_bracket; |
| 413 |
|
break; |
| 414 |
|
|
| 415 |
|
case PCRE_INFO_BACKREFMAX: |
| 416 |
|
*((int *)where) = re->top_backref; |
| 417 |
|
break; |
| 418 |
|
|
| 419 |
|
case PCRE_INFO_FIRSTCHAR: |
| 420 |
|
*((int *)where) = |
| 421 |
|
((re->options & PCRE_FIRSTSET) != 0)? re->first_char : |
| 422 |
|
((re->options & PCRE_STARTLINE) != 0)? -1 : -2; |
| 423 |
|
break; |
| 424 |
|
|
| 425 |
|
case PCRE_INFO_FIRSTTABLE: |
| 426 |
|
*((const uschar **)where) = |
| 427 |
|
(study != NULL && (study->options & PCRE_STUDY_MAPPED) != 0)? |
| 428 |
|
study->start_bits : NULL; |
| 429 |
|
break; |
| 430 |
|
|
| 431 |
|
case PCRE_INFO_LASTLITERAL: |
| 432 |
|
*((int *)where) = |
| 433 |
|
((re->options & PCRE_REQCHSET) != 0)? re->req_char : -1; |
| 434 |
|
break; |
| 435 |
|
|
| 436 |
|
default: return PCRE_ERROR_BADOPTION; |
| 437 |
|
} |
| 438 |
|
|
| 439 |
|
return 0; |
| 440 |
|
} |
| 441 |
|
|
| 442 |
|
|
| 443 |
|
|
| 444 |
#ifdef DEBUG |
#ifdef DEBUG |
| 445 |
/************************************************* |
/************************************************* |
| 477 |
|
|
| 478 |
/* This function is called when a \ has been encountered. It either returns a |
/* This function is called when a \ has been encountered. It either returns a |
| 479 |
positive value for a simple escape such as \n, or a negative value which |
positive value for a simple escape such as \n, or a negative value which |
| 480 |
encodes one of the more complicated things such as \d. On entry, ptr is |
encodes one of the more complicated things such as \d. When UTF-8 is enabled, |
| 481 |
pointing at the \. On exit, it is on the final character of the escape |
a positive value greater than 255 may be returned. On entry, ptr is pointing at |
| 482 |
sequence. |
the \. On exit, it is on the final character of the escape sequence. |
| 483 |
|
|
| 484 |
Arguments: |
Arguments: |
| 485 |
ptrptr points to the pattern position pointer |
ptrptr points to the pattern position pointer |
| 499 |
int options, BOOL isclass, compile_data *cd) |
int options, BOOL isclass, compile_data *cd) |
| 500 |
{ |
{ |
| 501 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
| 502 |
int c = *(++ptr) & 255; /* Ensure > 0 on signed-char systems */ |
int c, i; |
| 503 |
int i; |
|
| 504 |
|
/* If backslash is at the end of the pattern, it's an error. */ |
| 505 |
|
|
| 506 |
|
c = *(++ptr); |
| 507 |
if (c == 0) *errorptr = ERR1; |
if (c == 0) *errorptr = ERR1; |
| 508 |
|
|
| 509 |
/* Digits or letters may have special meaning; all others are literals. */ |
/* Digits or letters may have special meaning; all others are literals. */ |
| 563 |
} |
} |
| 564 |
|
|
| 565 |
/* \0 always starts an octal number, but we may drop through to here with a |
/* \0 always starts an octal number, but we may drop through to here with a |
| 566 |
larger first octal digit */ |
larger first octal digit. */ |
| 567 |
|
|
| 568 |
case '0': |
case '0': |
| 569 |
c -= '0'; |
c -= '0'; |
| 570 |
while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 && |
while(i++ < 2 && (cd->ctypes[ptr[1]] & ctype_digit) != 0 && |
| 571 |
ptr[1] != '8' && ptr[1] != '9') |
ptr[1] != '8' && ptr[1] != '9') |
| 572 |
c = c * 8 + *(++ptr) - '0'; |
c = c * 8 + *(++ptr) - '0'; |
| 573 |
|
c &= 255; /* Take least significant 8 bits */ |
| 574 |
break; |
break; |
| 575 |
|
|
| 576 |
/* Special escapes not starting with a digit are straightforward */ |
/* \x is complicated when UTF-8 is enabled. \x{ddd} is a character number |
| 577 |
|
which can be greater than 0xff, but only if the ddd are hex digits. */ |
| 578 |
|
|
| 579 |
case 'x': |
case 'x': |
| 580 |
|
#ifdef SUPPORT_UTF8 |
| 581 |
|
if (ptr[1] == '{' && (options & PCRE_UTF8) != 0) |
| 582 |
|
{ |
| 583 |
|
const uschar *pt = ptr + 2; |
| 584 |
|
register int count = 0; |
| 585 |
|
c = 0; |
| 586 |
|
while ((cd->ctypes[*pt] & ctype_xdigit) != 0) |
| 587 |
|
{ |
| 588 |
|
count++; |
| 589 |
|
c = c * 16 + cd->lcc[*pt] - |
| 590 |
|
(((cd->ctypes[*pt] & ctype_digit) != 0)? '0' : 'W'); |
| 591 |
|
pt++; |
| 592 |
|
} |
| 593 |
|
if (*pt == '}') |
| 594 |
|
{ |
| 595 |
|
if (c < 0 || count > 8) *errorptr = ERR34; |
| 596 |
|
ptr = pt; |
| 597 |
|
break; |
| 598 |
|
} |
| 599 |
|
/* If the sequence of hex digits does not end with '}', then we don't |
| 600 |
|
recognize this construct; fall through to the normal \x handling. */ |
| 601 |
|
} |
| 602 |
|
#endif |
| 603 |
|
|
| 604 |
|
/* Read just a single hex char */ |
| 605 |
|
|
| 606 |
c = 0; |
c = 0; |
| 607 |
while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) |
while (i++ < 2 && (cd->ctypes[ptr[1]] & ctype_xdigit) != 0) |
| 608 |
{ |
{ |
| 612 |
} |
} |
| 613 |
break; |
break; |
| 614 |
|
|
| 615 |
|
/* Other special escapes not starting with a digit are straightforward */ |
| 616 |
|
|
| 617 |
case 'c': |
case 'c': |
| 618 |
c = *(++ptr); |
c = *(++ptr); |
| 619 |
if (c == 0) |
if (c == 0) |
| 751 |
|
|
| 752 |
Arguments: |
Arguments: |
| 753 |
code points to the start of the pattern (the bracket) |
code points to the start of the pattern (the bracket) |
| 754 |
|
options the compiling options |
| 755 |
|
|
| 756 |
Returns: the fixed length, or -1 if there is no fixed length |
Returns: the fixed length, or -1 if there is no fixed length |
| 757 |
*/ |
*/ |
| 758 |
|
|
| 759 |
static int |
static int |
| 760 |
find_fixedlength(uschar *code) |
find_fixedlength(uschar *code, int options) |
| 761 |
{ |
{ |
| 762 |
int length = -1; |
int length = -1; |
| 763 |
|
|
| 778 |
case OP_BRA: |
case OP_BRA: |
| 779 |
case OP_ONCE: |
case OP_ONCE: |
| 780 |
case OP_COND: |
case OP_COND: |
| 781 |
d = find_fixedlength(cc); |
d = find_fixedlength(cc, options); |
| 782 |
if (d < 0) return -1; |
if (d < 0) return -1; |
| 783 |
branchlength += d; |
branchlength += d; |
| 784 |
do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT); |
do cc += (cc[1] << 8) + cc[2]; while (*cc == OP_ALT); |
| 815 |
|
|
| 816 |
case OP_REVERSE: |
case OP_REVERSE: |
| 817 |
cc++; |
cc++; |
| 818 |
|
/* Fall through */ |
| 819 |
|
|
| 820 |
case OP_CREF: |
case OP_CREF: |
| 821 |
case OP_OPT: |
case OP_OPT: |
| 832 |
cc++; |
cc++; |
| 833 |
break; |
break; |
| 834 |
|
|
| 835 |
/* Handle char strings */ |
/* Handle char strings. In UTF-8 mode we must count characters, not bytes. |
| 836 |
|
This requires a scan of the string, unfortunately. We assume valid UTF-8 |
| 837 |
|
strings, so all we do is reduce the length by one for byte whose bits are |
| 838 |
|
10xxxxxx. */ |
| 839 |
|
|
| 840 |
case OP_CHARS: |
case OP_CHARS: |
| 841 |
branchlength += *(++cc); |
branchlength += *(++cc); |
| 842 |
|
#ifdef SUPPORT_UTF8 |
| 843 |
|
for (d = 1; d <= *cc; d++) |
| 844 |
|
if ((cc[d] & 0xc0) == 0x80) branchlength--; |
| 845 |
|
#endif |
| 846 |
cc += *cc + 1; |
cc += *cc + 1; |
| 847 |
break; |
break; |
| 848 |
|
|
| 906 |
|
|
| 907 |
|
|
| 908 |
/************************************************* |
/************************************************* |
| 909 |
|
* Check for POSIX class syntax * |
| 910 |
|
*************************************************/ |
| 911 |
|
|
| 912 |
|
/* This function is called when the sequence "[:" or "[." or "[=" is |
| 913 |
|
encountered in a character class. It checks whether this is followed by an |
| 914 |
|
optional ^ and then a sequence of letters, terminated by a matching ":]" or |
| 915 |
|
".]" or "=]". |
| 916 |
|
|
| 917 |
|
Argument: |
| 918 |
|
ptr pointer to the initial [ |
| 919 |
|
endptr where to return the end pointer |
| 920 |
|
cd pointer to compile data |
| 921 |
|
|
| 922 |
|
Returns: TRUE or FALSE |
| 923 |
|
*/ |
| 924 |
|
|
| 925 |
|
static BOOL |
| 926 |
|
check_posix_syntax(const uschar *ptr, const uschar **endptr, compile_data *cd) |
| 927 |
|
{ |
| 928 |
|
int terminator; /* Don't combine these lines; the Solaris cc */ |
| 929 |
|
terminator = *(++ptr); /* compiler warns about "non-constant" initializer. */ |
| 930 |
|
if (*(++ptr) == '^') ptr++; |
| 931 |
|
while ((cd->ctypes[*ptr] & ctype_letter) != 0) ptr++; |
| 932 |
|
if (*ptr == terminator && ptr[1] == ']') |
| 933 |
|
{ |
| 934 |
|
*endptr = ptr; |
| 935 |
|
return TRUE; |
| 936 |
|
} |
| 937 |
|
return FALSE; |
| 938 |
|
} |
| 939 |
|
|
| 940 |
|
|
| 941 |
|
|
| 942 |
|
|
| 943 |
|
/************************************************* |
| 944 |
|
* Check POSIX class name * |
| 945 |
|
*************************************************/ |
| 946 |
|
|
| 947 |
|
/* This function is called to check the name given in a POSIX-style class entry |
| 948 |
|
such as [:alnum:]. |
| 949 |
|
|
| 950 |
|
Arguments: |
| 951 |
|
ptr points to the first letter |
| 952 |
|
len the length of the name |
| 953 |
|
|
| 954 |
|
Returns: a value representing the name, or -1 if unknown |
| 955 |
|
*/ |
| 956 |
|
|
| 957 |
|
static int |
| 958 |
|
check_posix_name(const uschar *ptr, int len) |
| 959 |
|
{ |
| 960 |
|
register int yield = 0; |
| 961 |
|
while (posix_name_lengths[yield] != 0) |
| 962 |
|
{ |
| 963 |
|
if (len == posix_name_lengths[yield] && |
| 964 |
|
strncmp((const char *)ptr, posix_names[yield], len) == 0) return yield; |
| 965 |
|
yield++; |
| 966 |
|
} |
| 967 |
|
return -1; |
| 968 |
|
} |
| 969 |
|
|
| 970 |
|
|
| 971 |
|
|
| 972 |
|
|
| 973 |
|
/************************************************* |
| 974 |
* Compile one branch * |
* Compile one branch * |
| 975 |
*************************************************/ |
*************************************************/ |
| 976 |
|
|
| 983 |
ptrptr points to the current pattern pointer |
ptrptr points to the current pattern pointer |
| 984 |
errorptr points to pointer to error message |
errorptr points to pointer to error message |
| 985 |
optchanged set to the value of the last OP_OPT item compiled |
optchanged set to the value of the last OP_OPT item compiled |
| 986 |
|
reqchar set to the last literal character required, else -1 |
| 987 |
|
countlits set to count of mandatory literal characters |
| 988 |
cd contains pointers to tables |
cd contains pointers to tables |
| 989 |
|
|
| 990 |
Returns: TRUE on success |
Returns: TRUE on success |
| 994 |
static BOOL |
static BOOL |
| 995 |
compile_branch(int options, int *brackets, uschar **codeptr, |
compile_branch(int options, int *brackets, uschar **codeptr, |
| 996 |
const uschar **ptrptr, const char **errorptr, int *optchanged, |
const uschar **ptrptr, const char **errorptr, int *optchanged, |
| 997 |
compile_data *cd) |
int *reqchar, int *countlits, compile_data *cd) |
| 998 |
{ |
{ |
| 999 |
int repeat_type, op_type; |
int repeat_type, op_type; |
| 1000 |
int repeat_min, repeat_max; |
int repeat_min, repeat_max; |
| 1001 |
int bravalue, length; |
int bravalue, length; |
| 1002 |
int greedy_default, greedy_non_default; |
int greedy_default, greedy_non_default; |
| 1003 |
|
int prevreqchar; |
| 1004 |
|
int condcount = 0; |
| 1005 |
|
int subcountlits = 0; |
| 1006 |
register int c; |
register int c; |
| 1007 |
register uschar *code = *codeptr; |
register uschar *code = *codeptr; |
| 1008 |
uschar *tempcode; |
uschar *tempcode; |
| 1016 |
greedy_default = ((options & PCRE_UNGREEDY) != 0); |
greedy_default = ((options & PCRE_UNGREEDY) != 0); |
| 1017 |
greedy_non_default = greedy_default ^ 1; |
greedy_non_default = greedy_default ^ 1; |
| 1018 |
|
|
| 1019 |
|
/* Initialize no required char, and count of literals */ |
| 1020 |
|
|
| 1021 |
|
*reqchar = prevreqchar = -1; |
| 1022 |
|
*countlits = 0; |
| 1023 |
|
|
| 1024 |
/* Switch on next character until the end of the branch */ |
/* Switch on next character until the end of the branch */ |
| 1025 |
|
|
| 1026 |
for (;; ptr++) |
for (;; ptr++) |
| 1030 |
int class_lastchar; |
int class_lastchar; |
| 1031 |
int newoptions; |
int newoptions; |
| 1032 |
int condref; |
int condref; |
| 1033 |
|
int subreqchar; |
| 1034 |
|
|
| 1035 |
c = *ptr; |
c = *ptr; |
| 1036 |
if ((options & PCRE_EXTENDED) != 0) |
if ((options & PCRE_EXTENDED) != 0) |
| 1038 |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
| 1039 |
if (c == '#') |
if (c == '#') |
| 1040 |
{ |
{ |
| 1041 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
/* The space before the ; is to avoid a warning on a silly compiler |
| 1042 |
|
on the Macintosh. */ |
| 1043 |
|
while ((c = *(++ptr)) != 0 && c != '\n') ; |
| 1044 |
continue; |
continue; |
| 1045 |
} |
} |
| 1046 |
} |
} |
| 1115 |
goto FAILED; |
goto FAILED; |
| 1116 |
} |
} |
| 1117 |
|
|
| 1118 |
|
/* Handle POSIX class names. Perl allows a negation extension of the |
| 1119 |
|
form [:^name]. A square bracket that doesn't match the syntax is |
| 1120 |
|
treated as a literal. We also recognize the POSIX constructions |
| 1121 |
|
[.ch.] and [=ch=] ("collating elements") and fault them, as Perl |
| 1122 |
|
5.6 does. */ |
| 1123 |
|
|
| 1124 |
|
if (c == '[' && |
| 1125 |
|
(ptr[1] == ':' || ptr[1] == '.' || ptr[1] == '=') && |
| 1126 |
|
check_posix_syntax(ptr, &tempptr, cd)) |
| 1127 |
|
{ |
| 1128 |
|
BOOL local_negate = FALSE; |
| 1129 |
|
int posix_class, i; |
| 1130 |
|
register const uschar *cbits = cd->cbits; |
| 1131 |
|
|
| 1132 |
|
if (ptr[1] != ':') |
| 1133 |
|
{ |
| 1134 |
|
*errorptr = ERR31; |
| 1135 |
|
goto FAILED; |
| 1136 |
|
} |
| 1137 |
|
|
| 1138 |
|
ptr += 2; |
| 1139 |
|
if (*ptr == '^') |
| 1140 |
|
{ |
| 1141 |
|
local_negate = TRUE; |
| 1142 |
|
ptr++; |
| 1143 |
|
} |
| 1144 |
|
|
| 1145 |
|
posix_class = check_posix_name(ptr, tempptr - ptr); |
| 1146 |
|
if (posix_class < 0) |
| 1147 |
|
{ |
| 1148 |
|
*errorptr = ERR30; |
| 1149 |
|
goto FAILED; |
| 1150 |
|
} |
| 1151 |
|
|
| 1152 |
|
/* If matching is caseless, upper and lower are converted to |
| 1153 |
|
alpha. This relies on the fact that the class table starts with |
| 1154 |
|
alpha, lower, upper as the first 3 entries. */ |
| 1155 |
|
|
| 1156 |
|
if ((options & PCRE_CASELESS) != 0 && posix_class <= 2) |
| 1157 |
|
posix_class = 0; |
| 1158 |
|
|
| 1159 |
|
/* Or into the map we are building up to 3 of the static class |
| 1160 |
|
tables, or their negations. */ |
| 1161 |
|
|
| 1162 |
|
posix_class *= 3; |
| 1163 |
|
for (i = 0; i < 3; i++) |
| 1164 |
|
{ |
| 1165 |
|
int taboffset = posix_class_maps[posix_class + i]; |
| 1166 |
|
if (taboffset < 0) break; |
| 1167 |
|
if (local_negate) |
| 1168 |
|
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset]; |
| 1169 |
|
else |
| 1170 |
|
for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset]; |
| 1171 |
|
} |
| 1172 |
|
|
| 1173 |
|
ptr = tempptr + 1; |
| 1174 |
|
class_charcount = 10; /* Set > 1; assumes more than 1 per class */ |
| 1175 |
|
continue; |
| 1176 |
|
} |
| 1177 |
|
|
| 1178 |
/* Backslash may introduce a single character, or it may introduce one |
/* Backslash may introduce a single character, or it may introduce one |
| 1179 |
of the specials, which just set a flag. Escaped items are checked for |
of the specials, which just set a flag. Escaped items are checked for |
| 1180 |
validity in the pre-compiling pass. The sequence \b is a special case. |
validity in the pre-compiling pass. The sequence \b is a special case. |
| 1202 |
continue; |
continue; |
| 1203 |
|
|
| 1204 |
case ESC_w: |
case ESC_w: |
| 1205 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) class[c] |= cbits[c+cbit_word]; |
|
class[c] |= (cbits[c+cbit_digit] | cbits[c+cbit_word]); |
|
| 1206 |
continue; |
continue; |
| 1207 |
|
|
| 1208 |
case ESC_W: |
case ESC_W: |
| 1209 |
for (c = 0; c < 32; c++) |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+cbit_word]; |
|
class[c] |= ~(cbits[c+cbit_digit] | cbits[c+cbit_word]); |
|
| 1210 |
continue; |
continue; |
| 1211 |
|
|
| 1212 |
case ESC_s: |
case ESC_s: |
| 1222 |
goto FAILED; |
goto FAILED; |
| 1223 |
} |
} |
| 1224 |
} |
} |
| 1225 |
/* Fall through if single character */ |
|
| 1226 |
|
/* Fall through if single character, but don't at present allow |
| 1227 |
|
chars > 255 in UTF-8 mode. */ |
| 1228 |
|
|
| 1229 |
|
#ifdef SUPPORT_UTF8 |
| 1230 |
|
if (c > 255) |
| 1231 |
|
{ |
| 1232 |
|
*errorptr = ERR33; |
| 1233 |
|
goto FAILED; |
| 1234 |
|
} |
| 1235 |
|
#endif |
| 1236 |
} |
} |
| 1237 |
|
|
| 1238 |
/* A single character may be followed by '-' to form a range. However, |
/* A single character may be followed by '-' to form a range. However, |
| 1252 |
} |
} |
| 1253 |
|
|
| 1254 |
/* The second part of a range can be a single-character escape, but |
/* The second part of a range can be a single-character escape, but |
| 1255 |
not any of the other escapes. */ |
not any of the other escapes. Perl 5.6 treats a hyphen as a literal |
| 1256 |
|
in such circumstances. */ |
| 1257 |
|
|
| 1258 |
if (d == '\\') |
if (d == '\\') |
| 1259 |
{ |
{ |
| 1260 |
|
const uschar *oldptr = ptr; |
| 1261 |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
d = check_escape(&ptr, errorptr, *brackets, options, TRUE, cd); |
| 1262 |
|
|
| 1263 |
|
#ifdef SUPPORT_UTF8 |
| 1264 |
|
if (d > 255) |
| 1265 |
|
{ |
| 1266 |
|
*errorptr = ERR33; |
| 1267 |
|
goto FAILED; |
| 1268 |
|
} |
| 1269 |
|
#endif |
| 1270 |
|
/* \b is backslash; any other special means the '-' was literal */ |
| 1271 |
|
|
| 1272 |
if (d < 0) |
if (d < 0) |
| 1273 |
{ |
{ |
| 1274 |
if (d == -ESC_b) d = '\b'; else |
if (d == -ESC_b) d = '\b'; else |
| 1275 |
{ |
{ |
| 1276 |
*errorptr = ERR7; |
ptr = oldptr - 2; |
| 1277 |
goto FAILED; |
goto SINGLE_CHARACTER; /* A few lines below */ |
| 1278 |
} |
} |
| 1279 |
} |
} |
| 1280 |
} |
} |
| 1302 |
/* Handle a lone single character - we can get here for a normal |
/* Handle a lone single character - we can get here for a normal |
| 1303 |
non-escape char, or after \ that introduces a single character. */ |
non-escape char, or after \ that introduces a single character. */ |
| 1304 |
|
|
| 1305 |
|
SINGLE_CHARACTER: |
| 1306 |
|
|
| 1307 |
class [c/8] |= (1 << (c&7)); |
class [c/8] |= (1 << (c&7)); |
| 1308 |
if ((options & PCRE_CASELESS) != 0) |
if ((options & PCRE_CASELESS) != 0) |
| 1309 |
{ |
{ |
| 1388 |
{ repeat_type = greedy_non_default; ptr++; } |
{ repeat_type = greedy_non_default; ptr++; } |
| 1389 |
else repeat_type = greedy_default; |
else repeat_type = greedy_default; |
| 1390 |
|
|
|
/* If the maximum is zero then the minimum must also be zero; Perl allows |
|
|
this case, so we do too - by simply omitting the item altogether. */ |
|
|
|
|
|
if (repeat_max == 0) code = previous; |
|
|
|
|
| 1391 |
/* If previous was a string of characters, chop off the last one and use it |
/* If previous was a string of characters, chop off the last one and use it |
| 1392 |
as the subject of the repeat. If there was only one character, we can |
as the subject of the repeat. If there was only one character, we can |
| 1393 |
abolish the previous item altogether. */ |
abolish the previous item altogether. A repeat with a zero minimum wipes |
| 1394 |
|
out any reqchar setting, backing up to the previous value. We must also |
| 1395 |
|
adjust the countlits value. */ |
| 1396 |
|
|
| 1397 |
else if (*previous == OP_CHARS) |
if (*previous == OP_CHARS) |
| 1398 |
{ |
{ |
| 1399 |
int len = previous[1]; |
int len = previous[1]; |
| 1400 |
|
|
| 1401 |
|
if (repeat_min == 0) *reqchar = prevreqchar; |
| 1402 |
|
*countlits += repeat_min - 1; |
| 1403 |
|
|
| 1404 |
if (len == 1) |
if (len == 1) |
| 1405 |
{ |
{ |
| 1406 |
c = previous[2]; |
c = previous[2]; |
| 1439 |
code = previous; |
code = previous; |
| 1440 |
|
|
| 1441 |
OUTPUT_SINGLE_REPEAT: |
OUTPUT_SINGLE_REPEAT: |
| 1442 |
repeat_type += op_type; /* Combine both values for many cases */ |
|
| 1443 |
|
/* If the maximum is zero then the minimum must also be zero; Perl allows |
| 1444 |
|
this case, so we do too - by simply omitting the item altogether. */ |
| 1445 |
|
|
| 1446 |
|
if (repeat_max == 0) goto END_REPEAT; |
| 1447 |
|
|
| 1448 |
|
/* Combine the op_type with the repeat_type */ |
| 1449 |
|
|
| 1450 |
|
repeat_type += op_type; |
| 1451 |
|
|
| 1452 |
/* A minimum of zero is handled either as the special case * or ?, or as |
/* A minimum of zero is handled either as the special case * or ?, or as |
| 1453 |
an UPTO, with the maximum given. */ |
an UPTO, with the maximum given. */ |
| 1524 |
} |
} |
| 1525 |
|
|
| 1526 |
/* If previous was a character class or a back reference, we put the repeat |
/* If previous was a character class or a back reference, we put the repeat |
| 1527 |
stuff after it. */ |
stuff after it, but just skip the item if the repeat was {0,0}. */ |
| 1528 |
|
|
| 1529 |
else if (*previous == OP_CLASS || *previous == OP_REF) |
else if (*previous == OP_CLASS || *previous == OP_REF) |
| 1530 |
{ |
{ |
| 1531 |
|
if (repeat_max == 0) |
| 1532 |
|
{ |
| 1533 |
|
code = previous; |
| 1534 |
|
goto END_REPEAT; |
| 1535 |
|
} |
| 1536 |
if (repeat_min == 0 && repeat_max == -1) |
if (repeat_min == 0 && repeat_max == -1) |
| 1537 |
*code++ = OP_CRSTAR + repeat_type; |
*code++ = OP_CRSTAR + repeat_type; |
| 1538 |
else if (repeat_min == 1 && repeat_max == -1) |
else if (repeat_min == 1 && repeat_max == -1) |
| 1583 |
|
|
| 1584 |
if (repeat_min == 0) |
if (repeat_min == 0) |
| 1585 |
{ |
{ |
| 1586 |
|
/* If we set up a required char from the bracket, we must back off |
| 1587 |
|
to the previous value and reset the countlits value too. */ |
| 1588 |
|
|
| 1589 |
|
if (subcountlits > 0) |
| 1590 |
|
{ |
| 1591 |
|
*reqchar = prevreqchar; |
| 1592 |
|
*countlits -= subcountlits; |
| 1593 |
|
} |
| 1594 |
|
|
| 1595 |
/* If the maximum is also zero, we just omit the group from the output |
/* If the maximum is also zero, we just omit the group from the output |
| 1596 |
altogether. */ |
altogether. */ |
| 1597 |
|
|
| 1598 |
if (repeat_max == 0) |
if (repeat_max == 0) |
| 1599 |
{ |
{ |
| 1600 |
code = previous; |
code = previous; |
| 1601 |
previous = NULL; |
goto END_REPEAT; |
|
break; |
|
| 1602 |
} |
} |
| 1603 |
|
|
| 1604 |
/* If the maximum is 1 or unlimited, we just have to stick in the |
/* If the maximum is 1 or unlimited, we just have to stick in the |
| 1703 |
correct offset was computed above. */ |
correct offset was computed above. */ |
| 1704 |
|
|
| 1705 |
else code[-ketoffset] = OP_KETRMAX + repeat_type; |
else code[-ketoffset] = OP_KETRMAX + repeat_type; |
|
|
|
|
|
|
|
#ifdef NEVER |
|
|
/* If the minimum is greater than zero, and the maximum is unlimited or |
|
|
equal to the minimum, the first copy remains where it is, and is |
|
|
replicated up to the minimum number of times. This case includes the + |
|
|
repeat, but of course no replication is needed in that case. */ |
|
|
|
|
|
if (repeat_min > 0 && (repeat_max == -1 || repeat_max == repeat_min)) |
|
|
{ |
|
|
for (i = 1; i < repeat_min; i++) |
|
|
{ |
|
|
memcpy(code, previous, len); |
|
|
code += len; |
|
|
} |
|
|
} |
|
|
|
|
|
/* If the minimum is zero, stick BRAZERO in front of the first copy. |
|
|
Then, if there is a fixed upper limit, replicated up to that many times, |
|
|
sticking BRAZERO in front of all the optional ones. */ |
|
|
|
|
|
else |
|
|
{ |
|
|
if (repeat_min == 0) |
|
|
{ |
|
|
memmove(previous+1, previous, len); |
|
|
code++; |
|
|
*previous++ = OP_BRAZERO + repeat_type; |
|
|
} |
|
|
|
|
|
for (i = 1; i < repeat_min; i++) |
|
|
{ |
|
|
memcpy(code, previous, len); |
|
|
code += len; |
|
|
} |
|
|
|
|
|
for (i = (repeat_min > 0)? repeat_min : 1; i < repeat_max; i++) |
|
|
{ |
|
|
*code++ = OP_BRAZERO + repeat_type; |
|
|
memcpy(code, previous, len); |
|
|
code += len; |
|
|
} |
|
|
} |
|
|
|
|
|
/* If the maximum is unlimited, set a repeater in the final copy. We |
|
|
can't just offset backwards from the current code point, because we |
|
|
don't know if there's been an options resetting after the ket. The |
|
|
correct offset was computed above. */ |
|
|
|
|
|
if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type; |
|
|
#endif |
|
|
|
|
|
|
|
| 1706 |
} |
} |
| 1707 |
|
|
| 1708 |
/* Else there's some kind of shambles */ |
/* Else there's some kind of shambles */ |
| 1715 |
|
|
| 1716 |
/* In all case we no longer have a previous item. */ |
/* In all case we no longer have a previous item. */ |
| 1717 |
|
|
| 1718 |
|
END_REPEAT: |
| 1719 |
previous = NULL; |
previous = NULL; |
| 1720 |
break; |
break; |
| 1721 |
|
|
| 1793 |
ptr++; |
ptr++; |
| 1794 |
break; |
break; |
| 1795 |
|
|
| 1796 |
|
case 'R': /* Pattern recursion */ |
| 1797 |
|
*code++ = OP_RECURSE; |
| 1798 |
|
ptr++; |
| 1799 |
|
continue; |
| 1800 |
|
|
| 1801 |
default: /* Option setting */ |
default: /* Option setting */ |
| 1802 |
set = unset = 0; |
set = unset = 0; |
| 1803 |
optset = &set; |
optset = &set; |
| 1889 |
(bravalue == OP_ASSERTBACK || |
(bravalue == OP_ASSERTBACK || |
| 1890 |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
bravalue == OP_ASSERTBACK_NOT), /* TRUE if back assert */ |
| 1891 |
condref, /* Condition reference number */ |
condref, /* Condition reference number */ |
| 1892 |
|
&subreqchar, /* For possible last char */ |
| 1893 |
|
&subcountlits, /* For literal count */ |
| 1894 |
cd)) /* Tables block */ |
cd)) /* Tables block */ |
| 1895 |
goto FAILED; |
goto FAILED; |
| 1896 |
|
|
| 1904 |
|
|
| 1905 |
if (bravalue == OP_COND) |
if (bravalue == OP_COND) |
| 1906 |
{ |
{ |
|
int branchcount = 0; |
|
| 1907 |
uschar *tc = code; |
uschar *tc = code; |
| 1908 |
|
condcount = 0; |
| 1909 |
|
|
| 1910 |
do { |
do { |
| 1911 |
branchcount++; |
condcount++; |
| 1912 |
tc += (tc[1] << 8) | tc[2]; |
tc += (tc[1] << 8) | tc[2]; |
| 1913 |
} |
} |
| 1914 |
while (*tc != OP_KET); |
while (*tc != OP_KET); |
| 1915 |
|
|
| 1916 |
if (branchcount > 2) |
if (condcount > 2) |
| 1917 |
{ |
{ |
| 1918 |
*errorptr = ERR27; |
*errorptr = ERR27; |
| 1919 |
goto FAILED; |
goto FAILED; |
| 1920 |
} |
} |
| 1921 |
} |
} |
| 1922 |
|
|
| 1923 |
|
/* Handle updating of the required character. If the subpattern didn't |
| 1924 |
|
set one, leave it as it was. Otherwise, update it for normal brackets of |
| 1925 |
|
all kinds, forward assertions, and conditions with two branches. Don't |
| 1926 |
|
update the literal count for forward assertions, however. If the bracket |
| 1927 |
|
is followed by a quantifier with zero repeat, we have to back off. Hence |
| 1928 |
|
the definition of prevreqchar and subcountlits outside the main loop so |
| 1929 |
|
that they can be accessed for the back off. */ |
| 1930 |
|
|
| 1931 |
|
if (subreqchar > 0 && |
| 1932 |
|
(bravalue >= OP_BRA || bravalue == OP_ONCE || bravalue == OP_ASSERT || |
| 1933 |
|
(bravalue == OP_COND && condcount == 2))) |
| 1934 |
|
{ |
| 1935 |
|
prevreqchar = *reqchar; |
| 1936 |
|
*reqchar = subreqchar; |
| 1937 |
|
if (bravalue != OP_ASSERT) *countlits += subcountlits; |
| 1938 |
|
} |
| 1939 |
|
|
| 1940 |
/* Now update the main code pointer to the end of the group. */ |
/* Now update the main code pointer to the end of the group. */ |
| 1941 |
|
|
| 1942 |
code = tempcode; |
code = tempcode; |
| 2004 |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
if ((cd->ctypes[c] & ctype_space) != 0) continue; |
| 2005 |
if (c == '#') |
if (c == '#') |
| 2006 |
{ |
{ |
| 2007 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
/* The space before the ; is to avoid a warning on a silly compiler |
| 2008 |
|
on the Macintosh. */ |
| 2009 |
|
while ((c = *(++ptr)) != 0 && c != '\n') ; |
| 2010 |
if (c == 0) break; |
if (c == 0) break; |
| 2011 |
continue; |
continue; |
| 2012 |
} |
} |
| 2021 |
tempptr = ptr; |
tempptr = ptr; |
| 2022 |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
c = check_escape(&ptr, errorptr, *brackets, options, FALSE, cd); |
| 2023 |
if (c < 0) { ptr = tempptr; break; } |
if (c < 0) { ptr = tempptr; break; } |
| 2024 |
|
|
| 2025 |
|
/* If a character is > 127 in UTF-8 mode, we have to turn it into |
| 2026 |
|
two or more characters in the UTF-8 encoding. */ |
| 2027 |
|
|
| 2028 |
|
#ifdef SUPPORT_UTF8 |
| 2029 |
|
if (c > 127 && (options & PCRE_UTF8) != 0) |
| 2030 |
|
{ |
| 2031 |
|
uschar buffer[8]; |
| 2032 |
|
int len = ord2utf8(c, buffer); |
| 2033 |
|
for (c = 0; c < len; c++) *code++ = buffer[c]; |
| 2034 |
|
length += len; |
| 2035 |
|
continue; |
| 2036 |
|
} |
| 2037 |
|
#endif |
| 2038 |
} |
} |
| 2039 |
|
|
| 2040 |
/* Ordinary character or single-char escape */ |
/* Ordinary character or single-char escape */ |
| 2045 |
|
|
| 2046 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
| 2047 |
|
|
| 2048 |
while (length < 255 && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0); |
while (length < MAXLIT && (cd->ctypes[c = *(++ptr)] & ctype_meta) == 0); |
| 2049 |
|
|
| 2050 |
|
/* Update the last character and the count of literals */ |
| 2051 |
|
|
| 2052 |
|
prevreqchar = (length > 1)? code[-2] : *reqchar; |
| 2053 |
|
*reqchar = code[-1]; |
| 2054 |
|
*countlits += length; |
| 2055 |
|
|
| 2056 |
/* Compute the length and set it in the data vector, and advance to |
/* Compute the length and set it in the data vector, and advance to |
| 2057 |
the next state. */ |
the next state. */ |
| 2058 |
|
|
| 2059 |
previous[1] = length; |
previous[1] = length; |
| 2060 |
if (length < 255) ptr--; |
if (length < MAXLIT) ptr--; |
| 2061 |
break; |
break; |
| 2062 |
} |
} |
| 2063 |
} /* end of big loop */ |
} /* end of big loop */ |
| 2096 |
errorptr -> pointer to error message |
errorptr -> pointer to error message |
| 2097 |
lookbehind TRUE if this is a lookbehind assertion |
lookbehind TRUE if this is a lookbehind assertion |
| 2098 |
condref > 0 for OPT_CREF setting at start of conditional group |
condref > 0 for OPT_CREF setting at start of conditional group |
| 2099 |
|
reqchar -> place to put the last required character, or a negative number |
| 2100 |
|
countlits -> place to put the shortest literal count of any branch |
| 2101 |
cd points to the data block with tables pointers |
cd points to the data block with tables pointers |
| 2102 |
|
|
| 2103 |
Returns: TRUE on success |
Returns: TRUE on success |
| 2106 |
static BOOL |
static BOOL |
| 2107 |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
compile_regex(int options, int optchanged, int *brackets, uschar **codeptr, |
| 2108 |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, |
const uschar **ptrptr, const char **errorptr, BOOL lookbehind, int condref, |
| 2109 |
compile_data *cd) |
int *reqchar, int *countlits, compile_data *cd) |
| 2110 |
{ |
{ |
| 2111 |
const uschar *ptr = *ptrptr; |
const uschar *ptr = *ptrptr; |
| 2112 |
uschar *code = *codeptr; |
uschar *code = *codeptr; |
| 2114 |
uschar *start_bracket = code; |
uschar *start_bracket = code; |
| 2115 |
uschar *reverse_count = NULL; |
uschar *reverse_count = NULL; |
| 2116 |
int oldoptions = options & PCRE_IMS; |
int oldoptions = options & PCRE_IMS; |
| 2117 |
|
int branchreqchar, branchcountlits; |
| 2118 |
|
|
| 2119 |
|
*reqchar = -1; |
| 2120 |
|
*countlits = INT_MAX; |
| 2121 |
code += 3; |
code += 3; |
| 2122 |
|
|
| 2123 |
/* At the start of a reference-based conditional group, insert the reference |
/* At the start of a reference-based conditional group, insert the reference |
| 2156 |
|
|
| 2157 |
/* Now compile the branch */ |
/* Now compile the branch */ |
| 2158 |
|
|
| 2159 |
if (!compile_branch(options,brackets,&code,&ptr,errorptr,&optchanged,cd)) |
if (!compile_branch(options, brackets, &code, &ptr, errorptr, &optchanged, |
| 2160 |
|
&branchreqchar, &branchcountlits, cd)) |
| 2161 |
{ |
{ |
| 2162 |
*ptrptr = ptr; |
*ptrptr = ptr; |
| 2163 |
return FALSE; |
return FALSE; |
| 2169 |
last_branch[1] = length >> 8; |
last_branch[1] = length >> 8; |
| 2170 |
last_branch[2] = length & 255; |
last_branch[2] = length & 255; |
| 2171 |
|
|
| 2172 |
|
/* Save the last required character if all branches have the same; a current |
| 2173 |
|
value of -1 means unset, while -2 means "previous branch had no last required |
| 2174 |
|
char". */ |
| 2175 |
|
|
| 2176 |
|
if (*reqchar != -2) |
| 2177 |
|
{ |
| 2178 |
|
if (branchreqchar >= 0) |
| 2179 |
|
{ |
| 2180 |
|
if (*reqchar == -1) *reqchar = branchreqchar; |
| 2181 |
|
else if (*reqchar != branchreqchar) *reqchar = -2; |
| 2182 |
|
} |
| 2183 |
|
else *reqchar = -2; |
| 2184 |
|
} |
| 2185 |
|
|
| 2186 |
|
/* Keep the shortest literal count */ |
| 2187 |
|
|
| 2188 |
|
if (branchcountlits < *countlits) *countlits = branchcountlits; |
| 2189 |
|
DPRINTF(("literal count = %d min=%d\n", branchcountlits, *countlits)); |
| 2190 |
|
|
| 2191 |
/* If lookbehind, check that this branch matches a fixed-length string, |
/* If lookbehind, check that this branch matches a fixed-length string, |
| 2192 |
and put the length into the OP_REVERSE item. Temporarily mark the end of |
and put the length into the OP_REVERSE item. Temporarily mark the end of |
| 2193 |
the branch with OP_END. */ |
the branch with OP_END. */ |
| 2195 |
if (lookbehind) |
if (lookbehind) |
| 2196 |
{ |
{ |
| 2197 |
*code = OP_END; |
*code = OP_END; |
| 2198 |
length = find_fixedlength(last_branch); |
length = find_fixedlength(last_branch, options); |
| 2199 |
DPRINTF(("fixed length = %d\n", length)); |
DPRINTF(("fixed length = %d\n", length)); |
| 2200 |
if (length < 0) |
if (length < 0) |
| 2201 |
{ |
{ |
| 2282 |
code += 2; |
code += 2; |
| 2283 |
break; |
break; |
| 2284 |
|
|
| 2285 |
|
case OP_WORD_BOUNDARY: |
| 2286 |
|
case OP_NOT_WORD_BOUNDARY: |
| 2287 |
|
code++; |
| 2288 |
|
break; |
| 2289 |
|
|
| 2290 |
case OP_ASSERT_NOT: |
case OP_ASSERT_NOT: |
| 2291 |
case OP_ASSERTBACK: |
case OP_ASSERTBACK: |
| 2292 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
| 2314 |
it's anchored. However, if this is a multiline pattern, then only OP_SOD |
it's anchored. However, if this is a multiline pattern, then only OP_SOD |
| 2315 |
counts, since OP_CIRC can match in the middle. |
counts, since OP_CIRC can match in the middle. |
| 2316 |
|
|
| 2317 |
A branch is also implicitly anchored if it starts with .* because that will try |
A branch is also implicitly anchored if it starts with .* and DOTALL is set, |
| 2318 |
the rest of the pattern at all possible matching points, so there is no point |
because that will try the rest of the pattern at all possible matching points, |
| 2319 |
trying them again. |
so there is no point trying them again. |
| 2320 |
|
|
| 2321 |
Arguments: |
Arguments: |
| 2322 |
code points to start of expression (the bracket) |
code points to start of expression (the bracket) |
| 2334 |
register int op = *scode; |
register int op = *scode; |
| 2335 |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
| 2336 |
{ if (!is_anchored(scode, options)) return FALSE; } |
{ if (!is_anchored(scode, options)) return FALSE; } |
| 2337 |
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) |
else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) && |
| 2338 |
|
(*options & PCRE_DOTALL) != 0) |
| 2339 |
{ if (scode[1] != OP_ANY) return FALSE; } |
{ if (scode[1] != OP_ANY) return FALSE; } |
| 2340 |
else if (op != OP_SOD && |
else if (op != OP_SOD && |
| 2341 |
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) |
((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC)) |
| 2349 |
|
|
| 2350 |
|
|
| 2351 |
/************************************************* |
/************************************************* |
| 2352 |
* Check for start with \n line expression * |
* Check for starting with ^ or .* * |
| 2353 |
*************************************************/ |
*************************************************/ |
| 2354 |
|
|
| 2355 |
/* This is called for multiline expressions to try to find out if every branch |
/* This is called to find out if every branch starts with ^ or .* so that |
| 2356 |
starts with ^ so that "first char" processing can be done to speed things up. |
"first char" processing can be done to speed things up in multiline |
| 2357 |
|
matching and for non-DOTALL patterns that start with .* (which must start at |
| 2358 |
|
the beginning or after \n). |
| 2359 |
|
|
| 2360 |
Argument: points to start of expression (the bracket) |
Argument: points to start of expression (the bracket) |
| 2361 |
Returns: TRUE or FALSE |
Returns: TRUE or FALSE |
| 2369 |
register int op = *scode; |
register int op = *scode; |
| 2370 |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND) |
| 2371 |
{ if (!is_startline(scode)) return FALSE; } |
{ if (!is_startline(scode)) return FALSE; } |
| 2372 |
|
else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR) |
| 2373 |
|
{ if (scode[1] != OP_ANY) return FALSE; } |
| 2374 |
else if (op != OP_CIRC) return FALSE; |
else if (op != OP_CIRC) return FALSE; |
| 2375 |
code += (code[1] << 8) + code[2]; |
code += (code[1] << 8) + code[2]; |
| 2376 |
} |
} |
| 2469 |
real_pcre *re; |
real_pcre *re; |
| 2470 |
int length = 3; /* For initial BRA plus length */ |
int length = 3; /* For initial BRA plus length */ |
| 2471 |
int runlength; |
int runlength; |
| 2472 |
int c, size; |
int c, reqchar, countlits; |
| 2473 |
int bracount = 0; |
int bracount = 0; |
| 2474 |
int top_backref = 0; |
int top_backref = 0; |
| 2475 |
int branch_extra = 0; |
int branch_extra = 0; |
| 2476 |
int branch_newextra; |
int branch_newextra; |
| 2477 |
unsigned int brastackptr = 0; |
unsigned int brastackptr = 0; |
| 2478 |
|
size_t size; |
| 2479 |
uschar *code; |
uschar *code; |
| 2480 |
const uschar *ptr; |
const uschar *ptr; |
| 2481 |
compile_data compile_block; |
compile_data compile_block; |
| 2486 |
uschar *code_base, *code_end; |
uschar *code_base, *code_end; |
| 2487 |
#endif |
#endif |
| 2488 |
|
|
| 2489 |
|
/* Can't support UTF8 unless PCRE has been compiled to include the code. */ |
| 2490 |
|
|
| 2491 |
|
#ifndef SUPPORT_UTF8 |
| 2492 |
|
if ((options & PCRE_UTF8) != 0) |
| 2493 |
|
{ |
| 2494 |
|
*errorptr = ERR32; |
| 2495 |
|
return NULL; |
| 2496 |
|
} |
| 2497 |
|
#endif |
| 2498 |
|
|
| 2499 |
/* We can't pass back an error message if errorptr is NULL; I guess the best we |
/* We can't pass back an error message if errorptr is NULL; I guess the best we |
| 2500 |
can do is just return NULL. */ |
can do is just return NULL. */ |
| 2501 |
|
|
| 2548 |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
| 2549 |
if (c == '#') |
if (c == '#') |
| 2550 |
{ |
{ |
| 2551 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
/* The space before the ; is to avoid a warning on a silly compiler |
| 2552 |
|
on the Macintosh. */ |
| 2553 |
|
while ((c = *(++ptr)) != 0 && c != '\n') ; |
| 2554 |
continue; |
continue; |
| 2555 |
} |
} |
| 2556 |
} |
} |
| 2715 |
ptr += 2; |
ptr += 2; |
| 2716 |
break; |
break; |
| 2717 |
|
|
| 2718 |
|
/* A recursive call to the regex is an extension, to provide the |
| 2719 |
|
facility which can be obtained by $(?p{perl-code}) in Perl 5.6. */ |
| 2720 |
|
|
| 2721 |
|
case 'R': |
| 2722 |
|
if (ptr[3] != ')') |
| 2723 |
|
{ |
| 2724 |
|
*errorptr = ERR29; |
| 2725 |
|
goto PCRE_ERROR_RETURN; |
| 2726 |
|
} |
| 2727 |
|
ptr += 3; |
| 2728 |
|
length += 1; |
| 2729 |
|
break; |
| 2730 |
|
|
| 2731 |
/* Lookbehinds are in Perl from version 5.005 */ |
/* Lookbehinds are in Perl from version 5.005 */ |
| 2732 |
|
|
| 2733 |
case '<': |
case '<': |
| 2760 |
else /* An assertion must follow */ |
else /* An assertion must follow */ |
| 2761 |
{ |
{ |
| 2762 |
ptr++; /* Can treat like ':' as far as spacing is concerned */ |
ptr++; /* Can treat like ':' as far as spacing is concerned */ |
| 2763 |
|
if (ptr[2] != '?' || |
| 2764 |
if (ptr[2] != '?' || strchr("=!<", ptr[3]) == NULL) |
(ptr[3] != '=' && ptr[3] != '!' && ptr[3] != '<') ) |
| 2765 |
{ |
{ |
| 2766 |
ptr += 2; /* To get right offset in message */ |
ptr += 2; /* To get right offset in message */ |
| 2767 |
*errorptr = ERR28; |
*errorptr = ERR28; |
| 2835 |
will lead to an over-estimate on the length, but this shouldn't |
will lead to an over-estimate on the length, but this shouldn't |
| 2836 |
matter very much. We also have to allow for resetting options at |
matter very much. We also have to allow for resetting options at |
| 2837 |
the start of any alternations, which we do by setting |
the start of any alternations, which we do by setting |
| 2838 |
branch_newextra to 2. */ |
branch_newextra to 2. Finally, we record whether the case-dependent |
| 2839 |
|
flag ever changes within the regex. This is used by the "required |
| 2840 |
|
character" code. */ |
| 2841 |
|
|
| 2842 |
case ':': |
case ':': |
| 2843 |
if (((set|unset) & PCRE_IMS) != 0) |
if (((set|unset) & PCRE_IMS) != 0) |
| 2844 |
{ |
{ |
| 2845 |
length += 4; |
length += 4; |
| 2846 |
branch_newextra = 2; |
branch_newextra = 2; |
| 2847 |
|
if (((set|unset) & PCRE_CASELESS) != 0) options |= PCRE_ICHANGED; |
| 2848 |
} |
} |
| 2849 |
goto END_OPTIONS; |
goto END_OPTIONS; |
| 2850 |
|
|
| 2974 |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
if ((compile_block.ctypes[c] & ctype_space) != 0) continue; |
| 2975 |
if (c == '#') |
if (c == '#') |
| 2976 |
{ |
{ |
| 2977 |
while ((c = *(++ptr)) != 0 && c != '\n'); |
/* The space before the ; is to avoid a warning on a silly compiler |
| 2978 |
|
on the Macintosh. */ |
| 2979 |
|
while ((c = *(++ptr)) != 0 && c != '\n') ; |
| 2980 |
continue; |
continue; |
| 2981 |
} |
} |
| 2982 |
} |
} |
| 2991 |
&compile_block); |
&compile_block); |
| 2992 |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
if (*errorptr != NULL) goto PCRE_ERROR_RETURN; |
| 2993 |
if (c < 0) { ptr = saveptr; break; } |
if (c < 0) { ptr = saveptr; break; } |
| 2994 |
|
|
| 2995 |
|
#ifdef SUPPORT_UTF8 |
| 2996 |
|
if (c > 127 && (options & PCRE_UTF8) != 0) |
| 2997 |
|
{ |
| 2998 |
|
int i; |
| 2999 |
|
for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++) |
| 3000 |
|
if (c <= utf8_table1[i]) break; |
| 3001 |
|
runlength += i; |
| 3002 |
|
} |
| 3003 |
|
#endif |
| 3004 |
} |
} |
| 3005 |
|
|
| 3006 |
/* Ordinary character or single-char escape */ |
/* Ordinary character or single-char escape */ |
| 3010 |
|
|
| 3011 |
/* This "while" is the end of the "do" above. */ |
/* This "while" is the end of the "do" above. */ |
| 3012 |
|
|
| 3013 |
while (runlength < 255 && |
while (runlength < MAXLIT && |
| 3014 |
(compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0); |
(compile_block.ctypes[c = *(++ptr)] & ctype_meta) == 0); |
| 3015 |
|
|
| 3016 |
ptr--; |
ptr--; |
| 3042 |
return NULL; |
return NULL; |
| 3043 |
} |
} |
| 3044 |
|
|
| 3045 |
/* Put in the magic number and the options. */ |
/* Put in the magic number, and save the size, options, and table pointer */ |
| 3046 |
|
|
| 3047 |
re->magic_number = MAGIC_NUMBER; |
re->magic_number = MAGIC_NUMBER; |
| 3048 |
|
re->size = size; |
| 3049 |
re->options = options; |
re->options = options; |
| 3050 |
re->tables = tables; |
re->tables = tables; |
| 3051 |
|
|
| 3058 |
*code = OP_BRA; |
*code = OP_BRA; |
| 3059 |
bracount = 0; |
bracount = 0; |
| 3060 |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, |
(void)compile_regex(options, -1, &bracount, &code, &ptr, errorptr, FALSE, -1, |
| 3061 |
&compile_block); |
&reqchar, &countlits, &compile_block); |
| 3062 |
re->top_bracket = bracount; |
re->top_bracket = bracount; |
| 3063 |
re->top_backref = top_backref; |
re->top_backref = top_backref; |
| 3064 |
|
|
| 3090 |
return NULL; |
return NULL; |
| 3091 |
} |
} |
| 3092 |
|
|
| 3093 |
/* If the anchored option was not passed, set flag if we can determine that it |
/* If the anchored option was not passed, set flag if we can determine that the |
| 3094 |
is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if |
pattern is anchored by virtue of ^ characters or \A or anything else (such as |
| 3095 |
we can determine what the first character has to be, because that speeds up |
starting with .* when DOTALL is set). |
| 3096 |
unanchored matches no end. In the case of multiline matches, an alternative is |
|
| 3097 |
to set the PCRE_STARTLINE flag if all branches start with ^. */ |
Otherwise, see if we can determine what the first character has to be, because |
| 3098 |
|
that speeds up unanchored matches no end. If not, see if we can set the |
| 3099 |
|
PCRE_STARTLINE flag. This is helpful for multiline matches when all branches |
| 3100 |
|
start with ^. and also when all branches start with .* for non-DOTALL matches. |
| 3101 |
|
*/ |
| 3102 |
|
|
| 3103 |
if ((options & PCRE_ANCHORED) == 0) |
if ((options & PCRE_ANCHORED) == 0) |
| 3104 |
{ |
{ |
| 3118 |
} |
} |
| 3119 |
} |
} |
| 3120 |
|
|
| 3121 |
|
/* Save the last required character if there are at least two literal |
| 3122 |
|
characters on all paths, or if there is no first character setting. */ |
| 3123 |
|
|
| 3124 |
|
if (reqchar >= 0 && (countlits > 1 || (re->options & PCRE_FIRSTSET) == 0)) |
| 3125 |
|
{ |
| 3126 |
|
re->req_char = reqchar; |
| 3127 |
|
re->options |= PCRE_REQCHSET; |
| 3128 |
|
} |
| 3129 |
|
|
| 3130 |
/* Print out the compiled data for debugging */ |
/* Print out the compiled data for debugging */ |
| 3131 |
|
|
| 3132 |
#ifdef DEBUG |
#ifdef DEBUG |
| 3136 |
|
|
| 3137 |
if (re->options != 0) |
if (re->options != 0) |
| 3138 |
{ |
{ |
| 3139 |
printf("%s%s%s%s%s%s%s%s\n", |
printf("%s%s%s%s%s%s%s%s%s\n", |
| 3140 |
((re->options & PCRE_ANCHORED) != 0)? "anchored " : "", |
((re->options & PCRE_ANCHORED) != 0)? "anchored " : "", |
| 3141 |
((re->options & PCRE_CASELESS) != 0)? "caseless " : "", |
((re->options & PCRE_CASELESS) != 0)? "caseless " : "", |
| 3142 |
|
((re->options & PCRE_ICHANGED) != 0)? "case state changed " : "", |
| 3143 |
((re->options & PCRE_EXTENDED) != 0)? "extended " : "", |
((re->options & PCRE_EXTENDED) != 0)? "extended " : "", |
| 3144 |
((re->options & PCRE_MULTILINE) != 0)? "multiline " : "", |
((re->options & PCRE_MULTILINE) != 0)? "multiline " : "", |
| 3145 |
((re->options & PCRE_DOTALL) != 0)? "dotall " : "", |
((re->options & PCRE_DOTALL) != 0)? "dotall " : "", |
| 3154 |
else printf("First char = \\x%02x\n", re->first_char); |
else printf("First char = \\x%02x\n", re->first_char); |
| 3155 |
} |
} |
| 3156 |
|
|
| 3157 |
|
if ((re->options & PCRE_REQCHSET) != 0) |
| 3158 |
|
{ |
| 3159 |
|
if (isprint(re->req_char)) printf("Req char = %c\n", re->req_char); |
| 3160 |
|
else printf("Req char = \\x%02x\n", re->req_char); |
| 3161 |
|
} |
| 3162 |
|
|
| 3163 |
code_end = code; |
code_end = code; |
| 3164 |
code_base = code = re->code; |
code_base = code = re->code; |
| 3165 |
|
|
| 3393 |
|
|
| 3394 |
static BOOL |
static BOOL |
| 3395 |
match_ref(int offset, register const uschar *eptr, int length, match_data *md, |
match_ref(int offset, register const uschar *eptr, int length, match_data *md, |
| 3396 |
int ims) |
unsigned long int ims) |
| 3397 |
{ |
{ |
| 3398 |
const uschar *p = md->start_subject + md->offset_vector[offset]; |
const uschar *p = md->start_subject + md->offset_vector[offset]; |
| 3399 |
|
|
| 3444 |
offset_top current top pointer |
offset_top current top pointer |
| 3445 |
md pointer to "static" info for the match |
md pointer to "static" info for the match |
| 3446 |
ims current /i, /m, and /s options |
ims current /i, /m, and /s options |
| 3447 |
condassert TRUE if called to check a condition assertion |
eptrb pointer to chain of blocks containing eptr at start of |
| 3448 |
eptrb eptr at start of last bracket |
brackets - for testing for empty matches |
| 3449 |
|
flags can contain |
| 3450 |
|
match_condassert - this is an assertion condition |
| 3451 |
|
match_isgroup - this is the start of a bracketed group |
| 3452 |
|
|
| 3453 |
Returns: TRUE if matched |
Returns: TRUE if matched |
| 3454 |
*/ |
*/ |
| 3455 |
|
|
| 3456 |
static BOOL |
static BOOL |
| 3457 |
match(register const uschar *eptr, register const uschar *ecode, |
match(register const uschar *eptr, register const uschar *ecode, |
| 3458 |
int offset_top, match_data *md, int ims, BOOL condassert, const uschar *eptrb) |
int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, |
| 3459 |
|
int flags) |
| 3460 |
{ |
{ |
| 3461 |
int original_ims = ims; /* Save for resetting on ')' */ |
unsigned long int original_ims = ims; /* Save for resetting on ')' */ |
| 3462 |
|
eptrblock newptrb; |
| 3463 |
|
|
| 3464 |
|
/* At the start of a bracketed group, add the current subject pointer to the |
| 3465 |
|
stack of such pointers, to be re-instated at the end of the group when we hit |
| 3466 |
|
the closing ket. When match() is called in other circumstances, we don't add to |
| 3467 |
|
the stack. */ |
| 3468 |
|
|
| 3469 |
|
if ((flags & match_isgroup) != 0) |
| 3470 |
|
{ |
| 3471 |
|
newptrb.prev = eptrb; |
| 3472 |
|
newptrb.saved_eptr = eptr; |
| 3473 |
|
eptrb = &newptrb; |
| 3474 |
|
} |
| 3475 |
|
|
| 3476 |
|
/* Now start processing the operations. */ |
| 3477 |
|
|
| 3478 |
for (;;) |
for (;;) |
| 3479 |
{ |
{ |
| 3519 |
|
|
| 3520 |
do |
do |
| 3521 |
{ |
{ |
| 3522 |
if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE; |
if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup)) |
| 3523 |
|
return TRUE; |
| 3524 |
ecode += (ecode[1] << 8) + ecode[2]; |
ecode += (ecode[1] << 8) + ecode[2]; |
| 3525 |
} |
} |
| 3526 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 3546 |
DPRINTF(("start bracket 0\n")); |
DPRINTF(("start bracket 0\n")); |
| 3547 |
do |
do |
| 3548 |
{ |
{ |
| 3549 |
if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE; |
if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup)) |
| 3550 |
|
return TRUE; |
| 3551 |
ecode += (ecode[1] << 8) + ecode[2]; |
ecode += (ecode[1] << 8) + ecode[2]; |
| 3552 |
} |
} |
| 3553 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 3566 |
return match(eptr, |
return match(eptr, |
| 3567 |
ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)? |
ecode + ((offset < offset_top && md->offset_vector[offset] >= 0)? |
| 3568 |
5 : 3 + (ecode[1] << 8) + ecode[2]), |
5 : 3 + (ecode[1] << 8) + ecode[2]), |
| 3569 |
offset_top, md, ims, FALSE, eptr); |
offset_top, md, ims, eptrb, match_isgroup); |
| 3570 |
} |
} |
| 3571 |
|
|
| 3572 |
/* The condition is an assertion. Call match() to evaluate it - setting |
/* The condition is an assertion. Call match() to evaluate it - setting |
| 3574 |
|
|
| 3575 |
else |
else |
| 3576 |
{ |
{ |
| 3577 |
if (match(eptr, ecode+3, offset_top, md, ims, TRUE, NULL)) |
if (match(eptr, ecode+3, offset_top, md, ims, NULL, |
| 3578 |
|
match_condassert | match_isgroup)) |
| 3579 |
{ |
{ |
| 3580 |
ecode += 3 + (ecode[4] << 8) + ecode[5]; |
ecode += 3 + (ecode[4] << 8) + ecode[5]; |
| 3581 |
while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2]; |
while (*ecode == OP_ALT) ecode += (ecode[1] << 8) + ecode[2]; |
| 3582 |
} |
} |
| 3583 |
else ecode += (ecode[1] << 8) + ecode[2]; |
else ecode += (ecode[1] << 8) + ecode[2]; |
| 3584 |
return match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr); |
return match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup); |
| 3585 |
} |
} |
| 3586 |
/* Control never reaches here */ |
/* Control never reaches here */ |
| 3587 |
|
|
| 3591 |
ecode += 2; |
ecode += 2; |
| 3592 |
break; |
break; |
| 3593 |
|
|
| 3594 |
/* End of the pattern */ |
/* End of the pattern. If PCRE_NOTEMPTY is set, fail if we have matched |
| 3595 |
|
an empty string - recursion will then try other alternatives, if any. */ |
| 3596 |
|
|
| 3597 |
case OP_END: |
case OP_END: |
| 3598 |
|
if (md->notempty && eptr == md->start_match) return FALSE; |
| 3599 |
md->end_match_ptr = eptr; /* Record where we ended */ |
md->end_match_ptr = eptr; /* Record where we ended */ |
| 3600 |
md->end_offset_top = offset_top; /* and how many extracts were taken */ |
md->end_offset_top = offset_top; /* and how many extracts were taken */ |
| 3601 |
return TRUE; |
return TRUE; |
| 3605 |
case OP_OPT: |
case OP_OPT: |
| 3606 |
ims = ecode[1]; |
ims = ecode[1]; |
| 3607 |
ecode += 2; |
ecode += 2; |
| 3608 |
DPRINTF(("ims set to %02x\n", ims)); |
DPRINTF(("ims set to %02lx\n", ims)); |
| 3609 |
break; |
break; |
| 3610 |
|
|
| 3611 |
/* Assertion brackets. Check the alternative branches in turn - the |
/* Assertion brackets. Check the alternative branches in turn - the |
| 3618 |
case OP_ASSERTBACK: |
case OP_ASSERTBACK: |
| 3619 |
do |
do |
| 3620 |
{ |
{ |
| 3621 |
if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) break; |
if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup)) break; |
| 3622 |
ecode += (ecode[1] << 8) + ecode[2]; |
ecode += (ecode[1] << 8) + ecode[2]; |
| 3623 |
} |
} |
| 3624 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 3626 |
|
|
| 3627 |
/* If checking an assertion for a condition, return TRUE. */ |
/* If checking an assertion for a condition, return TRUE. */ |
| 3628 |
|
|
| 3629 |
if (condassert) return TRUE; |
if ((flags & match_condassert) != 0) return TRUE; |
| 3630 |
|
|
| 3631 |
/* Continue from after the assertion, updating the offsets high water |
/* Continue from after the assertion, updating the offsets high water |
| 3632 |
mark, since extracts may have been taken during the assertion. */ |
mark, since extracts may have been taken during the assertion. */ |
| 3642 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
| 3643 |
do |
do |
| 3644 |
{ |
{ |
| 3645 |
if (match(eptr, ecode+3, offset_top, md, ims, FALSE, NULL)) return FALSE; |
if (match(eptr, ecode+3, offset_top, md, ims, NULL, match_isgroup)) |
| 3646 |
|
return FALSE; |
| 3647 |
ecode += (ecode[1] << 8) + ecode[2]; |
ecode += (ecode[1] << 8) + ecode[2]; |
| 3648 |
} |
} |
| 3649 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 3650 |
|
|
| 3651 |
if (condassert) return TRUE; |
if ((flags & match_condassert) != 0) return TRUE; |
| 3652 |
|
|
| 3653 |
ecode += 3; |
ecode += 3; |
| 3654 |
continue; |
continue; |
| 3655 |
|
|
| 3656 |
/* Move the subject pointer back. This occurs only at the start of |
/* Move the subject pointer back. This occurs only at the start of |
| 3657 |
each branch of a lookbehind assertion. If we are too close to the start to |
each branch of a lookbehind assertion. If we are too close to the start to |
| 3658 |
move back, this match function fails. */ |
move back, this match function fails. When working with UTF-8 we move |
| 3659 |
|
back a number of characters, not bytes. */ |
| 3660 |
|
|
| 3661 |
case OP_REVERSE: |
case OP_REVERSE: |
| 3662 |
|
#ifdef SUPPORT_UTF8 |
| 3663 |
|
c = (ecode[1] << 8) + ecode[2]; |
| 3664 |
|
for (i = 0; i < c; i++) |
| 3665 |
|
{ |
| 3666 |
|
eptr--; |
| 3667 |
|
BACKCHAR(eptr) |
| 3668 |
|
} |
| 3669 |
|
#else |
| 3670 |
eptr -= (ecode[1] << 8) + ecode[2]; |
eptr -= (ecode[1] << 8) + ecode[2]; |
| 3671 |
|
#endif |
| 3672 |
|
|
| 3673 |
if (eptr < md->start_subject) return FALSE; |
if (eptr < md->start_subject) return FALSE; |
| 3674 |
ecode += 3; |
ecode += 3; |
| 3675 |
break; |
break; |
| 3676 |
|
|
| 3677 |
|
/* Recursion matches the current regex, nested. If there are any capturing |
| 3678 |
|
brackets started but not finished, we have to save their starting points |
| 3679 |
|
and reinstate them after the recursion. However, we don't know how many |
| 3680 |
|
such there are (offset_top records the completed total) so we just have |
| 3681 |
|
to save all the potential data. There may be up to 99 such values, which |
| 3682 |
|
is a bit large to put on the stack, but using malloc for small numbers |
| 3683 |
|
seems expensive. As a compromise, the stack is used when there are fewer |
| 3684 |
|
than 16 values to store; otherwise malloc is used. A problem is what to do |
| 3685 |
|
if the malloc fails ... there is no way of returning to the top level with |
| 3686 |
|
an error. Save the top 15 values on the stack, and accept that the rest |
| 3687 |
|
may be wrong. */ |
| 3688 |
|
|
| 3689 |
|
case OP_RECURSE: |
| 3690 |
|
{ |
| 3691 |
|
BOOL rc; |
| 3692 |
|
int *save; |
| 3693 |
|
int stacksave[15]; |
| 3694 |
|
|
| 3695 |
|
c = md->offset_max; |
| 3696 |
|
|
| 3697 |
|
if (c < 16) save = stacksave; else |
| 3698 |
|
{ |
| 3699 |
|
save = (int *)(pcre_malloc)((c+1) * sizeof(int)); |
| 3700 |
|
if (save == NULL) |
| 3701 |
|
{ |
| 3702 |
|
save = stacksave; |
| 3703 |
|
c = 15; |
| 3704 |
|
} |
| 3705 |
|
} |
| 3706 |
|
|
| 3707 |
|
for (i = 1; i <= c; i++) |
| 3708 |
|
save[i] = md->offset_vector[md->offset_end - i]; |
| 3709 |
|
rc = match(eptr, md->start_pattern, offset_top, md, ims, eptrb, |
| 3710 |
|
match_isgroup); |
| 3711 |
|
for (i = 1; i <= c; i++) |
| 3712 |
|
md->offset_vector[md->offset_end - i] = save[i]; |
| 3713 |
|
if (save != stacksave) (pcre_free)(save); |
| 3714 |
|
if (!rc) return FALSE; |
| 3715 |
|
|
| 3716 |
|
/* In case the recursion has set more capturing values, save the final |
| 3717 |
|
number, then move along the subject till after the recursive match, |
| 3718 |
|
and advance one byte in the pattern code. */ |
| 3719 |
|
|
| 3720 |
|
offset_top = md->end_offset_top; |
| 3721 |
|
eptr = md->end_match_ptr; |
| 3722 |
|
ecode++; |
| 3723 |
|
} |
| 3724 |
|
break; |
| 3725 |
|
|
| 3726 |
/* "Once" brackets are like assertion brackets except that after a match, |
/* "Once" brackets are like assertion brackets except that after a match, |
| 3727 |
the point in the subject string is not moved back. Thus there can never be |
the point in the subject string is not moved back. Thus there can never be |
| 3733 |
case OP_ONCE: |
case OP_ONCE: |
| 3734 |
{ |
{ |
| 3735 |
const uschar *prev = ecode; |
const uschar *prev = ecode; |
| 3736 |
|
const uschar *saved_eptr = eptr; |
| 3737 |
|
|
| 3738 |
do |
do |
| 3739 |
{ |
{ |
| 3740 |
if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) break; |
if (match(eptr, ecode+3, offset_top, md, ims, eptrb, match_isgroup)) |
| 3741 |
|
break; |
| 3742 |
ecode += (ecode[1] << 8) + ecode[2]; |
ecode += (ecode[1] << 8) + ecode[2]; |
| 3743 |
} |
} |
| 3744 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 3761 |
5.005. If there is an options reset, it will get obeyed in the normal |
5.005. If there is an options reset, it will get obeyed in the normal |
| 3762 |
course of events. */ |
course of events. */ |
| 3763 |
|
|
| 3764 |
if (*ecode == OP_KET || eptr == eptrb) |
if (*ecode == OP_KET || eptr == saved_eptr) |
| 3765 |
{ |
{ |
| 3766 |
ecode += 3; |
ecode += 3; |
| 3767 |
break; |
break; |
| 3775 |
if (ecode[3] == OP_OPT) |
if (ecode[3] == OP_OPT) |
| 3776 |
{ |
{ |
| 3777 |
ims = (ims & ~PCRE_IMS) | ecode[4]; |
ims = (ims & ~PCRE_IMS) | ecode[4]; |
| 3778 |
DPRINTF(("ims set to %02x at group repeat\n", ims)); |
DPRINTF(("ims set to %02lx at group repeat\n", ims)); |
| 3779 |
} |
} |
| 3780 |
|
|
| 3781 |
if (*ecode == OP_KETRMIN) |
if (*ecode == OP_KETRMIN) |
| 3782 |
{ |
{ |
| 3783 |
if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) || |
if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) || |
| 3784 |
match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE; |
match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup)) |
| 3785 |
|
return TRUE; |
| 3786 |
} |
} |
| 3787 |
else /* OP_KETRMAX */ |
else /* OP_KETRMAX */ |
| 3788 |
{ |
{ |
| 3789 |
if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) || |
if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) || |
| 3790 |
match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE; |
match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE; |
| 3791 |
} |
} |
| 3792 |
} |
} |
| 3793 |
return FALSE; |
return FALSE; |
| 3808 |
case OP_BRAZERO: |
case OP_BRAZERO: |
| 3809 |
{ |
{ |
| 3810 |
const uschar *next = ecode+1; |
const uschar *next = ecode+1; |
| 3811 |
if (match(eptr, next, offset_top, md, ims, FALSE, eptr)) return TRUE; |
if (match(eptr, next, offset_top, md, ims, eptrb, match_isgroup)) |
| 3812 |
|
return TRUE; |
| 3813 |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
| 3814 |
ecode = next + 3; |
ecode = next + 3; |
| 3815 |
} |
} |
| 3819 |
{ |
{ |
| 3820 |
const uschar *next = ecode+1; |
const uschar *next = ecode+1; |
| 3821 |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
do next += (next[1] << 8) + next[2]; while (*next == OP_ALT); |
| 3822 |
if (match(eptr, next+3, offset_top, md, ims, FALSE, eptr)) return TRUE; |
if (match(eptr, next+3, offset_top, md, ims, eptrb, match_isgroup)) |
| 3823 |
|
return TRUE; |
| 3824 |
ecode++; |
ecode++; |
| 3825 |
} |
} |
| 3826 |
break; |
break; |
| 3835 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 3836 |
{ |
{ |
| 3837 |
const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |
const uschar *prev = ecode - (ecode[1] << 8) - ecode[2]; |
| 3838 |
|
const uschar *saved_eptr = eptrb->saved_eptr; |
| 3839 |
|
|
| 3840 |
|
eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */ |
| 3841 |
|
|
| 3842 |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || |
| 3843 |
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || |
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || |
| 3857 |
int number = *prev - OP_BRA; |
int number = *prev - OP_BRA; |
| 3858 |
int offset = number << 1; |
int offset = number << 1; |
| 3859 |
|
|
| 3860 |
DPRINTF(("end bracket %d\n", number)); |
#ifdef DEBUG |
| 3861 |
|
printf("end bracket %d", number); |
| 3862 |
|
printf("\n"); |
| 3863 |
|
#endif |
| 3864 |
|
|
| 3865 |
if (number > 0) |
if (number > 0) |
| 3866 |
{ |
{ |
| 3878 |
the group. */ |
the group. */ |
| 3879 |
|
|
| 3880 |
ims = original_ims; |
ims = original_ims; |
| 3881 |
DPRINTF(("ims reset to %02x\n", ims)); |
DPRINTF(("ims reset to %02lx\n", ims)); |
| 3882 |
|
|
| 3883 |
/* For a non-repeating ket, just continue at this level. This also |
/* For a non-repeating ket, just continue at this level. This also |
| 3884 |
happens for a repeating ket if no characters were matched in the group. |
happens for a repeating ket if no characters were matched in the group. |
| 3886 |
5.005. If there is an options reset, it will get obeyed in the normal |
5.005. If there is an options reset, it will get obeyed in the normal |
| 3887 |
course of events. */ |
course of events. */ |
| 3888 |
|
|
| 3889 |
if (*ecode == OP_KET || eptr == eptrb) |
if (*ecode == OP_KET || eptr == saved_eptr) |
| 3890 |
{ |
{ |
| 3891 |
ecode += 3; |
ecode += 3; |
| 3892 |
break; |
break; |
| 3897 |
|
|
| 3898 |
if (*ecode == OP_KETRMIN) |
if (*ecode == OP_KETRMIN) |
| 3899 |
{ |
{ |
| 3900 |
if (match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr) || |
if (match(eptr, ecode+3, offset_top, md, ims, eptrb, 0) || |
| 3901 |
match(eptr, prev, offset_top, md, ims, FALSE, eptr)) return TRUE; |
match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup)) |
| 3902 |
|
return TRUE; |
| 3903 |
} |
} |
| 3904 |
else /* OP_KETRMAX */ |
else /* OP_KETRMAX */ |
| 3905 |
{ |
{ |
| 3906 |
if (match(eptr, prev, offset_top, md, ims, FALSE, eptr) || |
if (match(eptr, prev, offset_top, md, ims, eptrb, match_isgroup) || |
| 3907 |
match(eptr, ecode+3, offset_top, md, ims, FALSE, eptr)) return TRUE; |
match(eptr, ecode+3, offset_top, md, ims, eptrb, 0)) return TRUE; |
| 3908 |
} |
} |
| 3909 |
} |
} |
| 3910 |
return FALSE; |
return FALSE; |
| 3989 |
if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n') |
if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == '\n') |
| 3990 |
return FALSE; |
return FALSE; |
| 3991 |
if (eptr++ >= md->end_subject) return FALSE; |
if (eptr++ >= md->end_subject) return FALSE; |
| 3992 |
|
#ifdef SUPPORT_UTF8 |
| 3993 |
|
if (md->utf8) |
| 3994 |
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 3995 |
|
#endif |
| 3996 |
ecode++; |
ecode++; |
| 3997 |
break; |
break; |
| 3998 |
|
|
| 4119 |
{ |
{ |
| 4120 |
for (i = min;; i++) |
for (i = min;; i++) |
| 4121 |
{ |
{ |
| 4122 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) |
| 4123 |
return TRUE; |
return TRUE; |
| 4124 |
if (i >= max || !match_ref(offset, eptr, length, md, ims)) |
if (i >= max || !match_ref(offset, eptr, length, md, ims)) |
| 4125 |
return FALSE; |
return FALSE; |
| 4140 |
} |
} |
| 4141 |
while (eptr >= pp) |
while (eptr >= pp) |
| 4142 |
{ |
{ |
| 4143 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) |
| 4144 |
return TRUE; |
return TRUE; |
| 4145 |
eptr -= length; |
eptr -= length; |
| 4146 |
} |
} |
| 4194 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 4195 |
{ |
{ |
| 4196 |
if (eptr >= md->end_subject) return FALSE; |
if (eptr >= md->end_subject) return FALSE; |
| 4197 |
c = *eptr++; |
GETCHARINC(c, eptr) /* Get character; increment eptr */ |
| 4198 |
|
|
| 4199 |
|
#ifdef SUPPORT_UTF8 |
| 4200 |
|
/* We do not yet support class members > 255 */ |
| 4201 |
|
if (c > 255) return FALSE; |
| 4202 |
|
#endif |
| 4203 |
|
|
| 4204 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 4205 |
return FALSE; |
return FALSE; |
| 4206 |
} |
} |
| 4217 |
{ |
{ |
| 4218 |
for (i = min;; i++) |
for (i = min;; i++) |
| 4219 |
{ |
{ |
| 4220 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) |
| 4221 |
return TRUE; |
return TRUE; |
| 4222 |
if (i >= max || eptr >= md->end_subject) return FALSE; |
if (i >= max || eptr >= md->end_subject) return FALSE; |
| 4223 |
c = *eptr++; |
GETCHARINC(c, eptr) /* Get character; increment eptr */ |
| 4224 |
|
|
| 4225 |
|
#ifdef SUPPORT_UTF8 |
| 4226 |
|
/* We do not yet support class members > 255 */ |
| 4227 |
|
if (c > 255) return FALSE; |
| 4228 |
|
#endif |
| 4229 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
| 4230 |
return FALSE; |
return FALSE; |
| 4231 |
} |
} |
| 4237 |
else |
else |
| 4238 |
{ |
{ |
| 4239 |
const uschar *pp = eptr; |
const uschar *pp = eptr; |
| 4240 |
for (i = min; i < max; eptr++, i++) |
int len = 1; |
| 4241 |
|
for (i = min; i < max; i++) |
| 4242 |
{ |
{ |
| 4243 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
| 4244 |
c = *eptr; |
GETCHARLEN(c, eptr, len) /* Get character, set length if UTF-8 */ |
| 4245 |
if ((data[c/8] & (1 << (c&7))) != 0) continue; |
|
| 4246 |
break; |
#ifdef SUPPORT_UTF8 |
| 4247 |
|
/* We do not yet support class members > 255 */ |
| 4248 |
|
if (c > 255) break; |
| 4249 |
|
#endif |
| 4250 |
|
if ((data[c/8] & (1 << (c&7))) == 0) break; |
| 4251 |
|
eptr += len; |
| 4252 |
} |
} |
| 4253 |
|
|
| 4254 |
while (eptr >= pp) |
while (eptr >= pp) |
| 4255 |
if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb)) |
{ |
| 4256 |
|
if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) |
| 4257 |
return TRUE; |
return TRUE; |
| 4258 |
|
|
| 4259 |
|
#ifdef SUPPORT_UTF8 |
| 4260 |
|
BACKCHAR(eptr) |
| 4261 |
|
#endif |
| 4262 |
|
} |
| 4263 |
return FALSE; |
return FALSE; |
| 4264 |
} |
} |
| 4265 |
} |
} |
| 4355 |
{ |
{ |
| 4356 |
for (i = min;; i++) |
for (i = min;; i++) |
| 4357 |
{ |
{ |
| 4358 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) |
| 4359 |
return TRUE; |
return TRUE; |
| 4360 |
if (i >= max || eptr >= md->end_subject || |
if (i >= max || eptr >= md->end_subject || |
| 4361 |
c != md->lcc[*eptr++]) |
c != md->lcc[*eptr++]) |
| 4372 |
eptr++; |
eptr++; |
| 4373 |
} |
} |
| 4374 |
while (eptr >= pp) |
while (eptr >= pp) |
| 4375 |
if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) |
| 4376 |
return TRUE; |
return TRUE; |
| 4377 |
return FALSE; |
return FALSE; |
| 4378 |
} |
} |
| 4389 |
{ |
{ |
| 4390 |
for (i = min;; i++) |
for (i = min;; i++) |
| 4391 |
{ |
{ |
| 4392 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) |
| 4393 |
return TRUE; |
return TRUE; |
| 4394 |
if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE; |
if (i >= max || eptr >= md->end_subject || c != *eptr++) return FALSE; |
| 4395 |
} |
} |
| 4404 |
eptr++; |
eptr++; |
| 4405 |
} |
} |
| 4406 |
while (eptr >= pp) |
while (eptr >= pp) |
| 4407 |
if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) |
| 4408 |
return TRUE; |
return TRUE; |
| 4409 |
return FALSE; |
return FALSE; |
| 4410 |
} |
} |
| 4486 |
{ |
{ |
| 4487 |
for (i = min;; i++) |
for (i = min;; i++) |
| 4488 |
{ |
{ |
| 4489 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) |
| 4490 |
return TRUE; |
return TRUE; |
| 4491 |
if (i >= max || eptr >= md->end_subject || |
if (i >= max || eptr >= md->end_subject || |
| 4492 |
c == md->lcc[*eptr++]) |
c == md->lcc[*eptr++]) |
| 4503 |
eptr++; |
eptr++; |
| 4504 |
} |
} |
| 4505 |
while (eptr >= pp) |
while (eptr >= pp) |
| 4506 |
if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) |
| 4507 |
return TRUE; |
return TRUE; |
| 4508 |
return FALSE; |
return FALSE; |
| 4509 |
} |
} |
| 4520 |
{ |
{ |
| 4521 |
for (i = min;; i++) |
for (i = min;; i++) |
| 4522 |
{ |
{ |
| 4523 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) |
| 4524 |
return TRUE; |
return TRUE; |
| 4525 |
if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE; |
if (i >= max || eptr >= md->end_subject || c == *eptr++) return FALSE; |
| 4526 |
} |
} |
| 4535 |
eptr++; |
eptr++; |
| 4536 |
} |
} |
| 4537 |
while (eptr >= pp) |
while (eptr >= pp) |
| 4538 |
if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb)) |
if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) |
| 4539 |
return TRUE; |
return TRUE; |
| 4540 |
return FALSE; |
return FALSE; |
| 4541 |
} |
} |
| 4579 |
|
|
| 4580 |
/* First, ensure the minimum number of matches are present. Use inline |
/* First, ensure the minimum number of matches are present. Use inline |
| 4581 |
code for maximizing the speed, and do the type test once at the start |
code for maximizing the speed, and do the type test once at the start |
| 4582 |
(i.e. keep it out of the loop). Also test that there are at least the |
(i.e. keep it out of the loop). Also we can test that there are at least |
| 4583 |
minimum number of characters before we start. */ |
the minimum number of bytes before we start, except when doing '.' in |
| 4584 |
|
UTF8 mode. Leave the test in in all cases; in the special case we have |
| 4585 |
|
to test after each character. */ |
| 4586 |
|
|
| 4587 |
if (min > md->end_subject - eptr) return FALSE; |
if (min > md->end_subject - eptr) return FALSE; |
| 4588 |
if (min > 0) switch(ctype) |
if (min > 0) switch(ctype) |
| 4589 |
{ |
{ |
| 4590 |
case OP_ANY: |
case OP_ANY: |
| 4591 |
|
#ifdef SUPPORT_UTF8 |
| 4592 |
|
if (md->utf8) |
| 4593 |
|
{ |
| 4594 |
|
for (i = 1; i <= min; i++) |
| 4595 |
|
{ |
| 4596 |
|
if (eptr >= md->end_subject || |
| 4597 |
|
(*eptr++ == '\n' && (ims & PCRE_DOTALL) == 0)) |
| 4598 |
|
return FALSE; |
| 4599 |
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 4600 |
|
} |
| 4601 |
|
break; |
| 4602 |
|
} |
| 4603 |
|
#endif |
| 4604 |
|
/* Non-UTF8 can be faster */ |
| 4605 |
if ((ims & PCRE_DOTALL) == 0) |
if ((ims & PCRE_DOTALL) == 0) |
| 4606 |
{ for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; } |
{ for (i = 1; i <= min; i++) if (*eptr++ == '\n') return FALSE; } |
| 4607 |
else eptr += min; |
else eptr += min; |
| 4651 |
{ |
{ |
| 4652 |
for (i = min;; i++) |
for (i = min;; i++) |
| 4653 |
{ |
{ |
| 4654 |
if (match(eptr, ecode, offset_top, md, ims, FALSE, eptrb)) return TRUE; |
if (match(eptr, ecode, offset_top, md, ims, eptrb, 0)) return TRUE; |
| 4655 |
if (i >= max || eptr >= md->end_subject) return FALSE; |
if (i >= max || eptr >= md->end_subject) return FALSE; |
| 4656 |
|
|
| 4657 |
c = *eptr++; |
c = *eptr++; |
| 4659 |
{ |
{ |
| 4660 |
case OP_ANY: |
case OP_ANY: |
| 4661 |
if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE; |
if ((ims & PCRE_DOTALL) == 0 && c == '\n') return FALSE; |
| 4662 |
|
#ifdef SUPPORT_UTF8 |
| 4663 |
|
if (md->utf8) |
| 4664 |
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 4665 |
|
#endif |
| 4666 |
break; |
break; |
| 4667 |
|
|
| 4668 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 4702 |
switch(ctype) |
switch(ctype) |
| 4703 |
{ |
{ |
| 4704 |
case OP_ANY: |
case OP_ANY: |
| 4705 |
|
|
| 4706 |
|
/* Special code is required for UTF8, but when the maximum is unlimited |
| 4707 |
|
we don't need it. */ |
| 4708 |
|
|
| 4709 |
|
#ifdef SUPPORT_UTF8 |
| 4710 |
|
if (md->utf8 && max < INT_MAX) |
| 4711 |
|
{ |
| 4712 |
|
if ((ims & PCRE_DOTALL) == 0) |
| 4713 |
|
{ |
| 4714 |
|
for (i = min; i < max; i++) |
| 4715 |
|
{ |
| 4716 |
|
if (eptr >= md->end_subject || *eptr++ == '\n') break; |
| 4717 |
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 4718 |
|
} |
| 4719 |
|
} |
| 4720 |
|
else |
| 4721 |
|
{ |
| 4722 |
|
for (i = min; i < max; i++) |
| 4723 |
|
{ |
| 4724 |
|
eptr++; |
| 4725 |
|
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 4726 |
|
} |
| 4727 |
|
} |
| 4728 |
|
break; |
| 4729 |
|
} |
| 4730 |
|
#endif |
| 4731 |
|
/* Non-UTF8 can be faster */ |
| 4732 |
if ((ims & PCRE_DOTALL) == 0) |
if ((ims & PCRE_DOTALL) == 0) |
| 4733 |
{ |
{ |
| 4734 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 4801 |
} |
} |
| 4802 |
|
|
| 4803 |
while (eptr >= pp) |
while (eptr >= pp) |
| 4804 |
if (match(eptr--, ecode, offset_top, md, ims, FALSE, eptrb)) |
{ |
| 4805 |
|
if (match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) |
| 4806 |
return TRUE; |
return TRUE; |
| 4807 |
|
#ifdef SUPPORT_UTF8 |
| 4808 |
|
if (md->utf8) |
| 4809 |
|
while (eptr > pp && (*eptr & 0xc0) == 0x80) eptr--; |
| 4810 |
|
#endif |
| 4811 |
|
} |
| 4812 |
return FALSE; |
return FALSE; |
| 4813 |
} |
} |
| 4814 |
/* Control never gets here */ |
/* Control never gets here */ |
| 4845 |
external_extra points to "hints" from pcre_study() or is NULL |
external_extra points to "hints" from pcre_study() or is NULL |
| 4846 |
subject points to the subject string |
subject points to the subject string |
| 4847 |
length length of subject string (may contain binary zeros) |
length length of subject string (may contain binary zeros) |
| 4848 |
|
start_offset where to start in the subject string |
| 4849 |
options option bits |
options option bits |
| 4850 |
offsets points to a vector of ints to be filled in with offsets |
offsets points to a vector of ints to be filled in with offsets |
| 4851 |
offsetcount the number of elements in the vector |
offsetcount the number of elements in the vector |
| 4858 |
|
|
| 4859 |
int |
int |
| 4860 |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
pcre_exec(const pcre *external_re, const pcre_extra *external_extra, |
| 4861 |
const char *subject, int length, int options, int *offsets, int offsetcount) |
const char *subject, int length, int start_offset, int options, int *offsets, |
| 4862 |
|
int offsetcount) |
| 4863 |
{ |
{ |
| 4864 |
int resetcount, ocount; |
int resetcount, ocount; |
| 4865 |
int first_char = -1; |
int first_char = -1; |
| 4866 |
int ims = 0; |
int req_char = -1; |
| 4867 |
|
int req_char2 = -1; |
| 4868 |
|
unsigned long int ims = 0; |
| 4869 |
match_data match_block; |
match_data match_block; |
| 4870 |
const uschar *start_bits = NULL; |
const uschar *start_bits = NULL; |
| 4871 |
const uschar *start_match = (const uschar *)subject; |
const uschar *start_match = (const uschar *)subject + start_offset; |
| 4872 |
const uschar *end_subject; |
const uschar *end_subject; |
| 4873 |
|
const uschar *req_char_ptr = start_match - 1; |
| 4874 |
const real_pcre *re = (const real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
| 4875 |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
const real_pcre_extra *extra = (const real_pcre_extra *)external_extra; |
| 4876 |
BOOL using_temporary_offsets = FALSE; |
BOOL using_temporary_offsets = FALSE; |
| 4883 |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
(offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL; |
| 4884 |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
if (re->magic_number != MAGIC_NUMBER) return PCRE_ERROR_BADMAGIC; |
| 4885 |
|
|
| 4886 |
|
match_block.start_pattern = re->code; |
| 4887 |
match_block.start_subject = (const uschar *)subject; |
match_block.start_subject = (const uschar *)subject; |
| 4888 |
match_block.end_subject = match_block.start_subject + length; |
match_block.end_subject = match_block.start_subject + length; |
| 4889 |
end_subject = match_block.end_subject; |
end_subject = match_block.end_subject; |
| 4890 |
|
|
| 4891 |
match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
match_block.endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0; |
| 4892 |
|
match_block.utf8 = (re->options & PCRE_UTF8) != 0; |
| 4893 |
|
|
| 4894 |
match_block.notbol = (options & PCRE_NOTBOL) != 0; |
match_block.notbol = (options & PCRE_NOTBOL) != 0; |
| 4895 |
match_block.noteol = (options & PCRE_NOTEOL) != 0; |
match_block.noteol = (options & PCRE_NOTEOL) != 0; |
| 4896 |
|
match_block.notempty = (options & PCRE_NOTEMPTY) != 0; |
| 4897 |
|
|
| 4898 |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
match_block.errorcode = PCRE_ERROR_NOMATCH; /* Default error */ |
| 4899 |
|
|
| 4964 |
start_bits = extra->start_bits; |
start_bits = extra->start_bits; |
| 4965 |
} |
} |
| 4966 |
|
|
| 4967 |
/* Loop for unanchored matches; for anchored regexps the loop runs just once. */ |
/* For anchored or unanchored matches, there may be a "last known required |
| 4968 |
|
character" set. If the PCRE_CASELESS is set, implying that the match starts |
| 4969 |
|
caselessly, or if there are any changes of this flag within the regex, set up |
| 4970 |
|
both cases of the character. Otherwise set the two values the same, which will |
| 4971 |
|
avoid duplicate testing (which takes significant time). This covers the vast |
| 4972 |
|
majority of cases. It will be suboptimal when the case flag changes in a regex |
| 4973 |
|
and the required character in fact is caseful. */ |
| 4974 |
|
|
| 4975 |
|
if ((re->options & PCRE_REQCHSET) != 0) |
| 4976 |
|
{ |
| 4977 |
|
req_char = re->req_char; |
| 4978 |
|
req_char2 = ((re->options & (PCRE_CASELESS | PCRE_ICHANGED)) != 0)? |
| 4979 |
|
(re->tables + fcc_offset)[req_char] : req_char; |
| 4980 |
|
} |
| 4981 |
|
|
| 4982 |
|
/* Loop for handling unanchored repeated matching attempts; for anchored regexs |
| 4983 |
|
the loop runs just once. */ |
| 4984 |
|
|
| 4985 |
do |
do |
| 4986 |
{ |
{ |
| 5009 |
|
|
| 5010 |
else if (startline) |
else if (startline) |
| 5011 |
{ |
{ |
| 5012 |
if (start_match > match_block.start_subject) |
if (start_match > match_block.start_subject + start_offset) |
| 5013 |
{ |
{ |
| 5014 |
while (start_match < end_subject && start_match[-1] != '\n') |
while (start_match < end_subject && start_match[-1] != '\n') |
| 5015 |
start_match++; |
start_match++; |
| 5016 |
} |
} |
| 5017 |
} |
} |
| 5018 |
|
|
| 5019 |
/* Or to a non-unique first char */ |
/* Or to a non-unique first char after study */ |
| 5020 |
|
|
| 5021 |
else if (start_bits != NULL) |
else if (start_bits != NULL) |
| 5022 |
{ |
{ |
| 5033 |
printf("\n"); |
printf("\n"); |
| 5034 |
#endif |
#endif |
| 5035 |
|
|
| 5036 |
|
/* If req_char is set, we know that that character must appear in the subject |
| 5037 |
|
for the match to succeed. If the first character is set, req_char must be |
| 5038 |
|
later in the subject; otherwise the test starts at the match point. This |
| 5039 |
|
optimization can save a huge amount of backtracking in patterns with nested |
| 5040 |
|
unlimited repeats that aren't going to match. We don't know what the state of |
| 5041 |
|
case matching may be when this character is hit, so test for it in both its |
| 5042 |
|
cases if necessary. However, the different cased versions will not be set up |
| 5043 |
|
unless PCRE_CASELESS was given or the casing state changes within the regex. |
| 5044 |
|
Writing separate code makes it go faster, as does using an autoincrement and |
| 5045 |
|
backing off on a match. */ |
| 5046 |
|
|
| 5047 |
|
if (req_char >= 0) |
| 5048 |
|
{ |
| 5049 |
|
register const uschar *p = start_match + ((first_char >= 0)? 1 : 0); |
| 5050 |
|
|
| 5051 |
|
/* We don't need to repeat the search if we haven't yet reached the |
| 5052 |
|
place we found it at last time. */ |
| 5053 |
|
|
| 5054 |
|
if (p > req_char_ptr) |
| 5055 |
|
{ |
| 5056 |
|
/* Do a single test if no case difference is set up */ |
| 5057 |
|
|
| 5058 |
|
if (req_char == req_char2) |
| 5059 |
|
{ |
| 5060 |
|
while (p < end_subject) |
| 5061 |
|
{ |
| 5062 |
|
if (*p++ == req_char) { p--; break; } |
| 5063 |
|
} |
| 5064 |
|
} |
| 5065 |
|
|
| 5066 |
|
/* Otherwise test for either case */ |
| 5067 |
|
|
| 5068 |
|
else |
| 5069 |
|
{ |
| 5070 |
|
while (p < end_subject) |
| 5071 |
|
{ |
| 5072 |
|
register int pp = *p++; |
| 5073 |
|
if (pp == req_char || pp == req_char2) { p--; break; } |
| 5074 |
|
} |
| 5075 |
|
} |
| 5076 |
|
|
| 5077 |
|
/* If we can't find the required character, break the matching loop */ |
| 5078 |
|
|
| 5079 |
|
if (p >= end_subject) break; |
| 5080 |
|
|
| 5081 |
|
/* If we have found the required character, save the point where we |
| 5082 |
|
found it, so that we don't search again next time round the loop if |
| 5083 |
|
the start hasn't passed this character yet. */ |
| 5084 |
|
|
| 5085 |
|
req_char_ptr = p; |
| 5086 |
|
} |
| 5087 |
|
} |
| 5088 |
|
|
| 5089 |
/* When a match occurs, substrings will be set for all internal extractions; |
/* When a match occurs, substrings will be set for all internal extractions; |
| 5090 |
we just need to set up the whole thing as substring 0 before returning. If |
we just need to set up the whole thing as substring 0 before returning. If |
| 5091 |
there were too many extractions, set the return code to zero. In the case |
there were too many extractions, set the return code to zero. In the case |
| 5093 |
those back references that we can. In this case there need not be overflow |
those back references that we can. In this case there need not be overflow |
| 5094 |
if certain parts of the pattern were not used. */ |
if certain parts of the pattern were not used. */ |
| 5095 |
|
|
| 5096 |
if (!match(start_match, re->code, 2, &match_block, ims, FALSE, start_match)) |
match_block.start_match = start_match; |
| 5097 |
|
if (!match(start_match, re->code, 2, &match_block, ims, NULL, match_isgroup)) |
| 5098 |
continue; |
continue; |
| 5099 |
|
|
| 5100 |
/* Copy the offset information from temporary store if necessary */ |
/* Copy the offset information from temporary store if necessary */ |