| 32 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 33 |
*/ |
*/ |
| 34 |
|
|
|
/* Define DEBUG to get debugging output on stdout. */ |
|
| 35 |
|
|
| 36 |
|
/* Define DEBUG to get debugging output on stdout. */ |
| 37 |
/* #define DEBUG */ |
/* #define DEBUG */ |
| 38 |
|
|
| 39 |
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef |
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef |
| 46 |
#define DPRINTF(p) /*nothing*/ |
#define DPRINTF(p) /*nothing*/ |
| 47 |
#endif |
#endif |
| 48 |
|
|
| 49 |
/* Include the internals header, which itself includes Standard C headers plus |
/* Include the internals header, which itself includes "config.h", the Standard |
| 50 |
the external pcre header. */ |
C headers, and the external pcre header. */ |
| 51 |
|
|
| 52 |
#include "internal.h" |
#include "internal.h" |
| 53 |
|
|
| 92 |
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
/* Table of sizes for the fixed-length opcodes. It's defined in a macro so that |
| 93 |
the definition is next to the definition of the opcodes in internal.h. */ |
the definition is next to the definition of the opcodes in internal.h. */ |
| 94 |
|
|
| 95 |
static uschar OP_lengths[] = { OP_LENGTHS }; |
static const uschar OP_lengths[] = { OP_LENGTHS }; |
| 96 |
|
|
| 97 |
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ |
/* Min and max values for the common repeats; for the maxima, 0 => infinity */ |
| 98 |
|
|
| 104 |
on. Zero means further processing is needed (for things like \x), or the escape |
on. Zero means further processing is needed (for things like \x), or the escape |
| 105 |
is invalid. */ |
is invalid. */ |
| 106 |
|
|
| 107 |
|
#if !EBCDIC /* This is the "normal" table for ASCII systems */ |
| 108 |
static const short int escapes[] = { |
static const short int escapes[] = { |
| 109 |
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ |
0, 0, 0, 0, 0, 0, 0, 0, /* 0 - 7 */ |
| 110 |
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ |
0, 0, ':', ';', '<', '=', '>', '?', /* 8 - ? */ |
| 118 |
0, 0, -ESC_z /* x - z */ |
0, 0, -ESC_z /* x - z */ |
| 119 |
}; |
}; |
| 120 |
|
|
| 121 |
|
#else /* This is the "abnormal" table for EBCDIC systems */ |
| 122 |
|
static const short int escapes[] = { |
| 123 |
|
/* 48 */ 0, 0, 0, '.', '<', '(', '+', '|', |
| 124 |
|
/* 50 */ '&', 0, 0, 0, 0, 0, 0, 0, |
| 125 |
|
/* 58 */ 0, 0, '!', '$', '*', ')', ';', '~', |
| 126 |
|
/* 60 */ '-', '/', 0, 0, 0, 0, 0, 0, |
| 127 |
|
/* 68 */ 0, 0, '|', ',', '%', '_', '>', '?', |
| 128 |
|
/* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, |
| 129 |
|
/* 78 */ 0, '`', ':', '#', '@', '\'', '=', '"', |
| 130 |
|
/* 80 */ 0, 7, -ESC_b, 0, -ESC_d, ESC_e, ESC_f, 0, |
| 131 |
|
/* 88 */ 0, 0, 0, '{', 0, 0, 0, 0, |
| 132 |
|
/* 90 */ 0, 0, 0, 'l', 0, ESC_n, 0, 0, |
| 133 |
|
/* 98 */ 0, ESC_r, 0, '}', 0, 0, 0, 0, |
| 134 |
|
/* A0 */ 0, '~', -ESC_s, ESC_tee, 0, 0, -ESC_w, 0, |
| 135 |
|
/* A8 */ 0,-ESC_z, 0, 0, 0, '[', 0, 0, |
| 136 |
|
/* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, |
| 137 |
|
/* B8 */ 0, 0, 0, 0, 0, ']', '=', '-', |
| 138 |
|
/* C0 */ '{',-ESC_A, -ESC_B, -ESC_C, -ESC_D,-ESC_E, 0, -ESC_G, |
| 139 |
|
/* C8 */ 0, 0, 0, 0, 0, 0, 0, 0, |
| 140 |
|
/* D0 */ '}', 0, 0, 0, 0, 0, 0, 0, |
| 141 |
|
/* D8 */-ESC_Q, 0, 0, 0, 0, 0, 0, 0, |
| 142 |
|
/* E0 */ '\\', 0, -ESC_S, 0, 0, 0, -ESC_W, 0, |
| 143 |
|
/* E8 */ 0,-ESC_Z, 0, 0, 0, 0, 0, 0, |
| 144 |
|
/* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, |
| 145 |
|
/* F8 */ 0, 0, 0, 0, 0, 0, 0, 0 |
| 146 |
|
}; |
| 147 |
|
#endif |
| 148 |
|
|
| 149 |
|
|
| 150 |
/* Tables of names of POSIX character classes and their lengths. The list is |
/* Tables of names of POSIX character classes and their lengths. The list is |
| 151 |
terminated by a zero length entry. The first three must be alpha, upper, lower, |
terminated by a zero length entry. The first three must be alpha, upper, lower, |
| 152 |
as this is assumed for handling case independence. */ |
as this is assumed for handling case independence. */ |
| 153 |
|
|
| 154 |
static const char *posix_names[] = { |
static const char *const posix_names[] = { |
| 155 |
"alpha", "lower", "upper", |
"alpha", "lower", "upper", |
| 156 |
"alnum", "ascii", "blank", "cntrl", "digit", "graph", |
"alnum", "ascii", "blank", "cntrl", "digit", "graph", |
| 157 |
"print", "punct", "space", "word", "xdigit" }; |
"print", "punct", "space", "word", "xdigit" }; |
| 180 |
cbit_xdigit,-1, -1 /* xdigit */ |
cbit_xdigit,-1, -1 /* xdigit */ |
| 181 |
}; |
}; |
| 182 |
|
|
| 183 |
/* Table to identify ASCII digits and hex digits. This is used when compiling |
/* Table to identify digits and hex digits. This is used when compiling |
| 184 |
patterns. Note that the tables in chartables are dependent on the locale, and |
patterns. Note that the tables in chartables are dependent on the locale, and |
| 185 |
may mark arbitrary characters as digits - but the PCRE compiling code expects |
may mark arbitrary characters as digits - but the PCRE compiling code expects |
| 186 |
to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have |
to handle only 0-9, a-z, and A-Z as digits when compiling. That is why we have |
| 196 |
|
|
| 197 |
Then we can use ctype_digit and ctype_xdigit in the code. */ |
Then we can use ctype_digit and ctype_xdigit in the code. */ |
| 198 |
|
|
| 199 |
|
#if !EBCDIC /* This is the "normal" case, for ASCII systems */ |
| 200 |
static const unsigned char digitab[] = |
static const unsigned char digitab[] = |
| 201 |
{ |
{ |
| 202 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */ |
| 232 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */ |
| 233 |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ |
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */ |
| 234 |
|
|
| 235 |
|
#else /* This is the "abnormal" case, for EBCDIC systems */ |
| 236 |
|
static const unsigned char digitab[] = |
| 237 |
|
{ |
| 238 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 0 */ |
| 239 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 8- 15 */ |
| 240 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 10 */ |
| 241 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ |
| 242 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 32- 39 20 */ |
| 243 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ |
| 244 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 30 */ |
| 245 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ |
| 246 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 40 */ |
| 247 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 72- | */ |
| 248 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 50 */ |
| 249 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 88- ¬ */ |
| 250 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 60 */ |
| 251 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ? */ |
| 252 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */ |
| 253 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ |
| 254 |
|
0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* 128- g 80 */ |
| 255 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ |
| 256 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144- p 90 */ |
| 257 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ |
| 258 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 160- x A0 */ |
| 259 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ |
| 260 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 B0 */ |
| 261 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ |
| 262 |
|
0x00,0x08,0x08,0x08,0x08,0x08,0x08,0x00, /* { - G C0 */ |
| 263 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ |
| 264 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* } - P D0 */ |
| 265 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ |
| 266 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* \ - X E0 */ |
| 267 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ |
| 268 |
|
0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c,0x0c, /* 0 - 7 F0 */ |
| 269 |
|
0x0c,0x0c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ |
| 270 |
|
|
| 271 |
|
static const unsigned char ebcdic_chartab[] = { /* chartable partial dup */ |
| 272 |
|
0x80,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 0- 7 */ |
| 273 |
|
0x00,0x00,0x00,0x00,0x01,0x01,0x00,0x00, /* 8- 15 */ |
| 274 |
|
0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 16- 23 */ |
| 275 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */ |
| 276 |
|
0x00,0x00,0x00,0x00,0x00,0x01,0x00,0x00, /* 32- 39 */ |
| 277 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 40- 47 */ |
| 278 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 48- 55 */ |
| 279 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 56- 63 */ |
| 280 |
|
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - 71 */ |
| 281 |
|
0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /* 72- | */ |
| 282 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* & - 87 */ |
| 283 |
|
0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /* 88- ¬ */ |
| 284 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - -103 */ |
| 285 |
|
0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ? */ |
| 286 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */ |
| 287 |
|
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 120- " */ |
| 288 |
|
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* 128- g */ |
| 289 |
|
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* h -143 */ |
| 290 |
|
0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* 144- p */ |
| 291 |
|
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* q -159 */ |
| 292 |
|
0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* 160- x */ |
| 293 |
|
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* y -175 */ |
| 294 |
|
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ^ -183 */ |
| 295 |
|
0x00,0x00,0x80,0x00,0x00,0x00,0x00,0x00, /* 184-191 */ |
| 296 |
|
0x80,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* { - G */ |
| 297 |
|
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* H -207 */ |
| 298 |
|
0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* } - P */ |
| 299 |
|
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Q -223 */ |
| 300 |
|
0x00,0x00,0x12,0x12,0x12,0x12,0x12,0x12, /* \ - X */ |
| 301 |
|
0x12,0x12,0x00,0x00,0x00,0x00,0x00,0x00, /* Y -239 */ |
| 302 |
|
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */ |
| 303 |
|
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x00};/* 8 -255 */ |
| 304 |
|
#endif |
| 305 |
|
|
| 306 |
|
|
| 307 |
/* Definition to allow mutual recursion */ |
/* Definition to allow mutual recursion */ |
| 308 |
|
|
| 309 |
static BOOL |
static BOOL |
| 313 |
/* Structure for building a chain of data that actually lives on the |
/* Structure for building a chain of data that actually lives on the |
| 314 |
stack, for holding the values of the subject pointer at the start of each |
stack, for holding the values of the subject pointer at the start of each |
| 315 |
subpattern, so as to detect when an empty string has been matched by a |
subpattern, so as to detect when an empty string has been matched by a |
| 316 |
subpattern - to break infinite loops. */ |
subpattern - to break infinite loops. When NO_RECURSE is set, these blocks |
| 317 |
|
are on the heap, not on the stack. */ |
| 318 |
|
|
| 319 |
typedef struct eptrblock { |
typedef struct eptrblock { |
| 320 |
struct eptrblock *prev; |
struct eptrblock *epb_prev; |
| 321 |
const uschar *saved_eptr; |
const uschar *epb_saved_eptr; |
| 322 |
} eptrblock; |
} eptrblock; |
| 323 |
|
|
| 324 |
/* Flag bits for the match() function */ |
/* Flag bits for the match() function */ |
| 339 |
*************************************************/ |
*************************************************/ |
| 340 |
|
|
| 341 |
/* PCRE is thread-clean and doesn't use any global variables in the normal |
/* PCRE is thread-clean and doesn't use any global variables in the normal |
| 342 |
sense. However, it calls memory allocation and free functions via the two |
sense. However, it calls memory allocation and free functions via the four |
| 343 |
indirections below, and it can optionally do callouts. These values can be |
indirections below, and it can optionally do callouts. These values can be |
| 344 |
changed by the caller, but are shared between all threads. However, when |
changed by the caller, but are shared between all threads. However, when |
| 345 |
compiling for Virtual Pascal, things are done differently (see pcre.in). */ |
compiling for Virtual Pascal, things are done differently (see pcre.in). */ |
| 348 |
#ifdef __cplusplus |
#ifdef __cplusplus |
| 349 |
extern "C" void *(*pcre_malloc)(size_t) = malloc; |
extern "C" void *(*pcre_malloc)(size_t) = malloc; |
| 350 |
extern "C" void (*pcre_free)(void *) = free; |
extern "C" void (*pcre_free)(void *) = free; |
| 351 |
|
extern "C" void *(*pcre_stack_malloc)(size_t) = malloc; |
| 352 |
|
extern "C" void (*pcre_stack_free)(void *) = free; |
| 353 |
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL; |
extern "C" int (*pcre_callout)(pcre_callout_block *) = NULL; |
| 354 |
#else |
#else |
| 355 |
void *(*pcre_malloc)(size_t) = malloc; |
void *(*pcre_malloc)(size_t) = malloc; |
| 356 |
void (*pcre_free)(void *) = free; |
void (*pcre_free)(void *) = free; |
| 357 |
|
void *(*pcre_stack_malloc)(size_t) = malloc; |
| 358 |
|
void (*pcre_stack_free)(void *) = free; |
| 359 |
int (*pcre_callout)(pcre_callout_block *) = NULL; |
int (*pcre_callout)(pcre_callout_block *) = NULL; |
| 360 |
#endif |
#endif |
| 361 |
#endif |
#endif |
| 552 |
#define STRING(a) # a |
#define STRING(a) # a |
| 553 |
#define XSTRING(s) STRING(s) |
#define XSTRING(s) STRING(s) |
| 554 |
|
|
| 555 |
const char * |
EXPORT const char * |
| 556 |
pcre_version(void) |
pcre_version(void) |
| 557 |
{ |
{ |
| 558 |
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE); |
return XSTRING(PCRE_MAJOR) "." XSTRING(PCRE_MINOR) " " XSTRING(PCRE_DATE); |
| 583 |
or negative values on error |
or negative values on error |
| 584 |
*/ |
*/ |
| 585 |
|
|
| 586 |
int |
EXPORT int |
| 587 |
pcre_info(const pcre *external_re, int *optptr, int *first_byte) |
pcre_info(const pcre *external_re, int *optptr, int *first_byte) |
| 588 |
{ |
{ |
| 589 |
const real_pcre *re = (const real_pcre *)external_re; |
const real_pcre *re = (const real_pcre *)external_re; |
| 614 |
Returns: 0 if data returned, negative on error |
Returns: 0 if data returned, negative on error |
| 615 |
*/ |
*/ |
| 616 |
|
|
| 617 |
int |
EXPORT int |
| 618 |
pcre_fullinfo(const pcre *external_re, const pcre_extra *extra_data, int what, |
pcre_fullinfo(const pcre *external_re, const pcre_extra *extra_data, int what, |
| 619 |
void *where) |
void *where) |
| 620 |
{ |
{ |
| 700 |
Returns: 0 if data returned, negative on error |
Returns: 0 if data returned, negative on error |
| 701 |
*/ |
*/ |
| 702 |
|
|
| 703 |
int |
EXPORT int |
| 704 |
pcre_config(int what, void *where) |
pcre_config(int what, void *where) |
| 705 |
{ |
{ |
| 706 |
switch (what) |
switch (what) |
| 729 |
*((unsigned int *)where) = MATCH_LIMIT; |
*((unsigned int *)where) = MATCH_LIMIT; |
| 730 |
break; |
break; |
| 731 |
|
|
| 732 |
|
case PCRE_CONFIG_STACKRECURSE: |
| 733 |
|
#ifdef NO_RECURSE |
| 734 |
|
*((int *)where) = 0; |
| 735 |
|
#else |
| 736 |
|
*((int *)where) = 1; |
| 737 |
|
#endif |
| 738 |
|
break; |
| 739 |
|
|
| 740 |
default: return PCRE_ERROR_BADOPTION; |
default: return PCRE_ERROR_BADOPTION; |
| 741 |
} |
} |
| 742 |
|
|
| 809 |
c = *(++ptr); |
c = *(++ptr); |
| 810 |
if (c == 0) *errorptr = ERR1; |
if (c == 0) *errorptr = ERR1; |
| 811 |
|
|
| 812 |
/* Digits or letters may have special meaning; all others are literals. */ |
/* Non-alphamerics are literals. For digits or letters, do an initial lookup in |
| 813 |
|
a table. A non-zero result is something that can be returned immediately. |
| 814 |
else if (c < '0' || c > 'z') {} |
Otherwise further processing may be required. */ |
|
|
|
|
/* Do an initial lookup in a table. A non-zero result is something that can be |
|
|
returned immediately. Otherwise further processing may be required. */ |
|
| 815 |
|
|
| 816 |
|
#if !EBCDIC /* ASCII coding */ |
| 817 |
|
else if (c < '0' || c > 'z') {} /* Not alphameric */ |
| 818 |
else if ((i = escapes[c - '0']) != 0) c = i; |
else if ((i = escapes[c - '0']) != 0) c = i; |
| 819 |
|
|
| 820 |
|
#else /* EBCDIC coding */ |
| 821 |
|
else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {} /* Not alphameric */ |
| 822 |
|
else if ((i = escapes[c - 0x48]) != 0) c = i; |
| 823 |
|
#endif |
| 824 |
|
|
| 825 |
/* Escapes that need further processing, or are illegal. */ |
/* Escapes that need further processing, or are illegal. */ |
| 826 |
|
|
| 827 |
else |
else |
| 906 |
while ((digitab[*pt] & ctype_xdigit) != 0) |
while ((digitab[*pt] & ctype_xdigit) != 0) |
| 907 |
{ |
{ |
| 908 |
int cc = *pt++; |
int cc = *pt++; |
|
if (cc >= 'a') cc -= 32; /* Convert to upper case */ |
|
| 909 |
count++; |
count++; |
| 910 |
|
#if !EBCDIC /* ASCII coding */ |
| 911 |
|
if (cc >= 'a') cc -= 32; /* Convert to upper case */ |
| 912 |
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); |
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); |
| 913 |
|
#else /* EBCDIC coding */ |
| 914 |
|
if (cc >= 'a' && cc <= 'z') cc += 64; /* Convert to upper case */ |
| 915 |
|
c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10)); |
| 916 |
|
#endif |
| 917 |
} |
} |
| 918 |
if (*pt == '}') |
if (*pt == '}') |
| 919 |
{ |
{ |
| 933 |
{ |
{ |
| 934 |
int cc; /* Some compilers don't like ++ */ |
int cc; /* Some compilers don't like ++ */ |
| 935 |
cc = *(++ptr); /* in initializers */ |
cc = *(++ptr); /* in initializers */ |
| 936 |
|
#if !EBCDIC /* ASCII coding */ |
| 937 |
if (cc >= 'a') cc -= 32; /* Convert to upper case */ |
if (cc >= 'a') cc -= 32; /* Convert to upper case */ |
| 938 |
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); |
c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10)); |
| 939 |
|
#else /* EBCDIC coding */ |
| 940 |
|
if (cc <= 'z') cc += 64; /* Convert to upper case */ |
| 941 |
|
c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10)); |
| 942 |
|
#endif |
| 943 |
} |
} |
| 944 |
break; |
break; |
| 945 |
|
|
| 954 |
} |
} |
| 955 |
|
|
| 956 |
/* A letter is upper-cased; then the 0x40 bit is flipped. This coding |
/* A letter is upper-cased; then the 0x40 bit is flipped. This coding |
| 957 |
is ASCII-specific, but then the whole concept of \cx is ASCII-specific. */ |
is ASCII-specific, but then the whole concept of \cx is ASCII-specific. |
| 958 |
|
(However, an EBCDIC equivalent has now been added.) */ |
| 959 |
|
|
| 960 |
|
#if !EBCDIC /* ASCII coding */ |
| 961 |
if (c >= 'a' && c <= 'z') c -= 32; |
if (c >= 'a' && c <= 'z') c -= 32; |
| 962 |
c ^= 0x40; |
c ^= 0x40; |
| 963 |
|
#else /* EBCDIC coding */ |
| 964 |
|
if (c >= 'a' && c <= 'z') c += 64; |
| 965 |
|
c ^= 0xC0; |
| 966 |
|
#endif |
| 967 |
break; |
break; |
| 968 |
|
|
| 969 |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
/* PCRE_EXTRA enables extensions to Perl in the matter of escapes. Any |
| 1007 |
static BOOL |
static BOOL |
| 1008 |
is_counted_repeat(const uschar *p) |
is_counted_repeat(const uschar *p) |
| 1009 |
{ |
{ |
| 1010 |
if ((digitab[*p++] && ctype_digit) == 0) return FALSE; |
if ((digitab[*p++] & ctype_digit) == 0) return FALSE; |
| 1011 |
while ((digitab[*p] & ctype_digit) != 0) p++; |
while ((digitab[*p] & ctype_digit) != 0) p++; |
| 1012 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
| 1013 |
|
|
| 1014 |
if (*p++ != ',') return FALSE; |
if (*p++ != ',') return FALSE; |
| 1015 |
if (*p == '}') return TRUE; |
if (*p == '}') return TRUE; |
| 1016 |
|
|
| 1017 |
if ((digitab[*p++] && ctype_digit) == 0) return FALSE; |
if ((digitab[*p++] & ctype_digit) == 0) return FALSE; |
| 1018 |
while ((digitab[*p] & ctype_digit) != 0) p++; |
while ((digitab[*p] & ctype_digit) != 0) p++; |
| 1019 |
|
|
| 1020 |
return (*p == '}'); |
return (*p == '}'); |
| 1354 |
{ |
{ |
| 1355 |
code += OP_lengths[c]; |
code += OP_lengths[c]; |
| 1356 |
|
|
| 1357 |
|
#ifdef SUPPORT_UTF8 |
| 1358 |
|
|
| 1359 |
/* In UTF-8 mode, opcodes that are followed by a character may be followed |
/* In UTF-8 mode, opcodes that are followed by a character may be followed |
| 1360 |
by a multi-byte character. The length in the table is a minimum, so we have |
by a multi-byte character. The length in the table is a minimum, so we have |
| 1361 |
to scan along to skip the extra characters. All opcodes are less than 128, |
to scan along to skip the extra characters. All opcodes are less than 128, |
| 1362 |
so we can use relatively efficient code. */ |
so we can use relatively efficient code. */ |
| 1363 |
|
|
| 1364 |
|
if (utf8) switch(c) |
| 1365 |
|
{ |
| 1366 |
|
case OP_EXACT: |
| 1367 |
|
case OP_UPTO: |
| 1368 |
|
case OP_MINUPTO: |
| 1369 |
|
case OP_STAR: |
| 1370 |
|
case OP_MINSTAR: |
| 1371 |
|
case OP_PLUS: |
| 1372 |
|
case OP_MINPLUS: |
| 1373 |
|
case OP_QUERY: |
| 1374 |
|
case OP_MINQUERY: |
| 1375 |
|
while ((*code & 0xc0) == 0x80) code++; |
| 1376 |
|
break; |
| 1377 |
|
|
| 1378 |
|
/* XCLASS is used for classes that cannot be represented just by a bit |
| 1379 |
|
map. This includes negated single high-valued characters. The length in |
| 1380 |
|
the table is zero; the actual length is stored in the compled code. */ |
| 1381 |
|
|
| 1382 |
|
case OP_XCLASS: |
| 1383 |
|
code += GET(code, 1) + 1; |
| 1384 |
|
break; |
| 1385 |
|
} |
| 1386 |
|
#endif |
| 1387 |
|
} |
| 1388 |
|
} |
| 1389 |
|
} |
| 1390 |
|
|
| 1391 |
|
|
| 1392 |
|
|
| 1393 |
|
/************************************************* |
| 1394 |
|
* Scan compiled regex for recursion reference * |
| 1395 |
|
*************************************************/ |
| 1396 |
|
|
| 1397 |
|
/* This little function scans through a compiled pattern until it finds an |
| 1398 |
|
instance of OP_RECURSE. |
| 1399 |
|
|
| 1400 |
|
Arguments: |
| 1401 |
|
code points to start of expression |
| 1402 |
|
utf8 TRUE in UTF-8 mode |
| 1403 |
|
|
| 1404 |
|
Returns: pointer to the opcode for OP_RECURSE, or NULL if not found |
| 1405 |
|
*/ |
| 1406 |
|
|
| 1407 |
|
static const uschar * |
| 1408 |
|
find_recurse(const uschar *code, BOOL utf8) |
| 1409 |
|
{ |
| 1410 |
|
#ifndef SUPPORT_UTF8 |
| 1411 |
|
utf8 = utf8; /* Stop pedantic compilers complaining */ |
| 1412 |
|
#endif |
| 1413 |
|
|
| 1414 |
|
for (;;) |
| 1415 |
|
{ |
| 1416 |
|
register int c = *code; |
| 1417 |
|
if (c == OP_END) return NULL; |
| 1418 |
|
else if (c == OP_RECURSE) return code; |
| 1419 |
|
else if (c == OP_CHARS) code += code[1] + OP_lengths[c]; |
| 1420 |
|
else if (c > OP_BRA) |
| 1421 |
|
{ |
| 1422 |
|
code += OP_lengths[OP_BRA]; |
| 1423 |
|
} |
| 1424 |
|
else |
| 1425 |
|
{ |
| 1426 |
|
code += OP_lengths[c]; |
| 1427 |
|
|
| 1428 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 1429 |
|
|
| 1430 |
|
/* In UTF-8 mode, opcodes that are followed by a character may be followed |
| 1431 |
|
by a multi-byte character. The length in the table is a minimum, so we have |
| 1432 |
|
to scan along to skip the extra characters. All opcodes are less than 128, |
| 1433 |
|
so we can use relatively efficient code. */ |
| 1434 |
|
|
| 1435 |
if (utf8) switch(c) |
if (utf8) switch(c) |
| 1436 |
{ |
{ |
| 1437 |
case OP_EXACT: |
case OP_EXACT: |
| 1445 |
case OP_MINQUERY: |
case OP_MINQUERY: |
| 1446 |
while ((*code & 0xc0) == 0x80) code++; |
while ((*code & 0xc0) == 0x80) code++; |
| 1447 |
break; |
break; |
| 1448 |
|
|
| 1449 |
|
/* XCLASS is used for classes that cannot be represented just by a bit |
| 1450 |
|
map. This includes negated single high-valued characters. The length in |
| 1451 |
|
the table is zero; the actual length is stored in the compled code. */ |
| 1452 |
|
|
| 1453 |
|
case OP_XCLASS: |
| 1454 |
|
code += GET(code, 1) + 1; |
| 1455 |
|
break; |
| 1456 |
} |
} |
| 1457 |
#endif |
#endif |
| 1458 |
} |
} |
| 1696 |
} |
} |
| 1697 |
|
|
| 1698 |
|
|
| 1699 |
|
/************************************************* |
| 1700 |
|
* Adjust OP_RECURSE items in repeated group * |
| 1701 |
|
*************************************************/ |
| 1702 |
|
|
| 1703 |
|
/* OP_RECURSE items contain an offset from the start of the regex to the group |
| 1704 |
|
that is referenced. This means that groups can be replicated for fixed |
| 1705 |
|
repetition simply by copying (because the recursion is allowed to refer to |
| 1706 |
|
earlier groups that are outside the current group). However, when a group is |
| 1707 |
|
optional (i.e. the minimum quantifier is zero), OP_BRAZERO is inserted before |
| 1708 |
|
it, after it has been compiled. This means that any OP_RECURSE items within it |
| 1709 |
|
that refer to the group itself or any contained groups have to have their |
| 1710 |
|
offsets adjusted. That is the job of this function. Before it is called, the |
| 1711 |
|
partially compiled regex must be temporarily terminated with OP_END. |
| 1712 |
|
|
| 1713 |
|
Arguments: |
| 1714 |
|
group points to the start of the group |
| 1715 |
|
adjust the amount by which the group is to be moved |
| 1716 |
|
utf8 TRUE in UTF-8 mode |
| 1717 |
|
cd contains pointers to tables etc. |
| 1718 |
|
|
| 1719 |
|
Returns: nothing |
| 1720 |
|
*/ |
| 1721 |
|
|
| 1722 |
|
static void |
| 1723 |
|
adjust_recurse(uschar *group, int adjust, BOOL utf8, compile_data *cd) |
| 1724 |
|
{ |
| 1725 |
|
uschar *ptr = group; |
| 1726 |
|
while ((ptr = (uschar *)find_recurse(ptr, utf8)) != NULL) |
| 1727 |
|
{ |
| 1728 |
|
int offset = GET(ptr, 1); |
| 1729 |
|
if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust); |
| 1730 |
|
ptr += 1 + LINK_SIZE; |
| 1731 |
|
} |
| 1732 |
|
} |
| 1733 |
|
|
| 1734 |
|
|
| 1735 |
|
|
| 1736 |
/************************************************* |
/************************************************* |
| 2017 |
posix_class *= 3; |
posix_class *= 3; |
| 2018 |
for (i = 0; i < 3; i++) |
for (i = 0; i < 3; i++) |
| 2019 |
{ |
{ |
| 2020 |
BOOL isblank = strncmp((char *)ptr, "blank", 5) == 0; |
BOOL blankclass = strncmp((char *)ptr, "blank", 5) == 0; |
| 2021 |
int taboffset = posix_class_maps[posix_class + i]; |
int taboffset = posix_class_maps[posix_class + i]; |
| 2022 |
if (taboffset < 0) break; |
if (taboffset < 0) break; |
| 2023 |
if (local_negate) |
if (local_negate) |
| 2024 |
{ |
{ |
| 2025 |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset]; |
for (c = 0; c < 32; c++) class[c] |= ~cbits[c+taboffset]; |
| 2026 |
if (isblank) class[1] |= 0x3c; |
if (blankclass) class[1] |= 0x3c; |
| 2027 |
} |
} |
| 2028 |
else |
else |
| 2029 |
{ |
{ |
| 2030 |
for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset]; |
for (c = 0; c < 32; c++) class[c] |= cbits[c+taboffset]; |
| 2031 |
if (isblank) class[1] &= ~0x3c; |
if (blankclass) class[1] &= ~0x3c; |
| 2032 |
} |
} |
| 2033 |
} |
} |
| 2034 |
|
|
| 2693 |
} |
} |
| 2694 |
|
|
| 2695 |
/* If the maximum is 1 or unlimited, we just have to stick in the |
/* If the maximum is 1 or unlimited, we just have to stick in the |
| 2696 |
BRAZERO and do no more at this point. */ |
BRAZERO and do no more at this point. However, we do need to adjust |
| 2697 |
|
any OP_RECURSE calls inside the group that refer to the group itself or |
| 2698 |
|
any internal group, because the offset is from the start of the whole |
| 2699 |
|
regex. Temporarily terminate the pattern while doing this. */ |
| 2700 |
|
|
| 2701 |
if (repeat_max <= 1) |
if (repeat_max <= 1) |
| 2702 |
{ |
{ |
| 2703 |
|
*code = OP_END; |
| 2704 |
|
adjust_recurse(previous, 1, utf8, cd); |
| 2705 |
memmove(previous+1, previous, len); |
memmove(previous+1, previous, len); |
| 2706 |
code++; |
code++; |
| 2707 |
*previous++ = OP_BRAZERO + repeat_type; |
*previous++ = OP_BRAZERO + repeat_type; |
| 2711 |
in a nested fashion, sticking OP_BRAZERO before each set of brackets. |
in a nested fashion, sticking OP_BRAZERO before each set of brackets. |
| 2712 |
The first one has to be handled carefully because it's the original |
The first one has to be handled carefully because it's the original |
| 2713 |
copy, which has to be moved up. The remainder can be handled by code |
copy, which has to be moved up. The remainder can be handled by code |
| 2714 |
that is common with the non-zero minimum case below. We just have to |
that is common with the non-zero minimum case below. We have to |
| 2715 |
adjust the value or repeat_max, since one less copy is required. */ |
adjust the value or repeat_max, since one less copy is required. Once |
| 2716 |
|
again, we may have to adjust any OP_RECURSE calls inside the group. */ |
| 2717 |
|
|
| 2718 |
else |
else |
| 2719 |
{ |
{ |
| 2720 |
int offset; |
int offset; |
| 2721 |
|
*code = OP_END; |
| 2722 |
|
adjust_recurse(previous, 2 + LINK_SIZE, utf8, cd); |
| 2723 |
memmove(previous + 2 + LINK_SIZE, previous, len); |
memmove(previous + 2 + LINK_SIZE, previous, len); |
| 2724 |
code += 2 + LINK_SIZE; |
code += 2 + LINK_SIZE; |
| 2725 |
*previous++ = OP_BRAZERO + repeat_type; |
*previous++ = OP_BRAZERO + repeat_type; |
| 3408 |
if (c < 0) { ptr = tempptr; break; } |
if (c < 0) { ptr = tempptr; break; } |
| 3409 |
|
|
| 3410 |
/* If a character is > 127 in UTF-8 mode, we have to turn it into |
/* If a character is > 127 in UTF-8 mode, we have to turn it into |
| 3411 |
two or more characters in the UTF-8 encoding. */ |
two or more bytes in the UTF-8 encoding. */ |
| 3412 |
|
|
| 3413 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 3414 |
if (utf8 && c > 127) |
if (utf8 && c > 127) |
| 4021 |
|
|
| 4022 |
for (p = string; length-- > 0; p++) |
for (p = string; length-- > 0; p++) |
| 4023 |
{ |
{ |
| 4024 |
int ab; |
register int ab; |
| 4025 |
if (*p < 128) continue; |
register int c = *p; |
| 4026 |
if ((*p & 0xc0) != 0xc0) return p - string; |
if (c < 128) continue; |
| 4027 |
ab = utf8_table4[*p & 0x3f]; /* Number of additional bytes */ |
if ((c & 0xc0) != 0xc0) return p - string; |
| 4028 |
|
ab = utf8_table4[c & 0x3f]; /* Number of additional bytes */ |
| 4029 |
if (length < ab) return p - string; |
if (length < ab) return p - string; |
| 4030 |
while (ab-- > 0) |
length -= ab; |
| 4031 |
|
|
| 4032 |
|
/* Check top bits in the second byte */ |
| 4033 |
|
if ((*(++p) & 0xc0) != 0x80) return p - string; |
| 4034 |
|
|
| 4035 |
|
/* Check for overlong sequences for each different length */ |
| 4036 |
|
switch (ab) |
| 4037 |
|
{ |
| 4038 |
|
/* Check for xx00 000x */ |
| 4039 |
|
case 1: |
| 4040 |
|
if ((c & 0x3e) == 0) return p - string; |
| 4041 |
|
continue; /* We know there aren't any more bytes to check */ |
| 4042 |
|
|
| 4043 |
|
/* Check for 1110 0000, xx0x xxxx */ |
| 4044 |
|
case 2: |
| 4045 |
|
if (c == 0xe0 && (*p & 0x20) == 0) return p - string; |
| 4046 |
|
break; |
| 4047 |
|
|
| 4048 |
|
/* Check for 1111 0000, xx00 xxxx */ |
| 4049 |
|
case 3: |
| 4050 |
|
if (c == 0xf0 && (*p & 0x30) == 0) return p - string; |
| 4051 |
|
break; |
| 4052 |
|
|
| 4053 |
|
/* Check for 1111 1000, xx00 0xxx */ |
| 4054 |
|
case 4: |
| 4055 |
|
if (c == 0xf8 && (*p & 0x38) == 0) return p - string; |
| 4056 |
|
break; |
| 4057 |
|
|
| 4058 |
|
/* Check for leading 0xfe or 0xff, and then for 1111 1100, xx00 00xx */ |
| 4059 |
|
case 5: |
| 4060 |
|
if (c == 0xfe || c == 0xff || |
| 4061 |
|
(c == 0xfc && (*p & 0x3c) == 0)) return p - string; |
| 4062 |
|
break; |
| 4063 |
|
} |
| 4064 |
|
|
| 4065 |
|
/* Check for valid bytes after the 2nd, if any; all must start 10 */ |
| 4066 |
|
while (--ab > 0) |
| 4067 |
{ |
{ |
| 4068 |
if ((*(++p) & 0xc0) != 0x80) return p - string; |
if ((*(++p) & 0xc0) != 0x80) return p - string; |
|
length--; |
|
| 4069 |
} |
} |
| 4070 |
} |
} |
| 4071 |
|
|
| 4093 |
with errorptr and erroroffset set |
with errorptr and erroroffset set |
| 4094 |
*/ |
*/ |
| 4095 |
|
|
| 4096 |
pcre * |
EXPORT pcre * |
| 4097 |
pcre_compile(const char *pattern, int options, const char **errorptr, |
pcre_compile(const char *pattern, int options, const char **errorptr, |
| 4098 |
int *erroroffset, const unsigned char *tables) |
int *erroroffset, const unsigned char *tables) |
| 4099 |
{ |
{ |
| 4502 |
{ |
{ |
| 4503 |
length += 33; |
length += 33; |
| 4504 |
|
|
| 4505 |
/* A repeat needs either 1 or 5 bytes. */ |
/* A repeat needs either 1 or 5 bytes. If it is a possessive quantifier, |
| 4506 |
|
we also need extra for wrapping the whole thing in a sub-pattern. */ |
| 4507 |
|
|
| 4508 |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
if (*ptr != 0 && ptr[1] == '{' && is_counted_repeat(ptr+2)) |
| 4509 |
{ |
{ |
| 4513 |
(min == 1 && max == -1)) |
(min == 1 && max == -1)) |
| 4514 |
length++; |
length++; |
| 4515 |
else length += 5; |
else length += 5; |
| 4516 |
if (ptr[1] == '?') ptr++; |
if (ptr[1] == '+') |
| 4517 |
|
{ |
| 4518 |
|
ptr++; |
| 4519 |
|
length += 2 + 2*LINK_SIZE; |
| 4520 |
|
} |
| 4521 |
|
else if (ptr[1] == '?') ptr++; |
| 4522 |
} |
} |
| 4523 |
} |
} |
| 4524 |
continue; |
continue; |
| 5159 |
if ((re->options & PCRE_FIRSTSET) != 0) |
if ((re->options & PCRE_FIRSTSET) != 0) |
| 5160 |
{ |
{ |
| 5161 |
int ch = re->first_byte & 255; |
int ch = re->first_byte & 255; |
| 5162 |
char *caseless = ((re->first_byte & REQ_CASELESS) == 0)? "" : " (caseless)"; |
const char *caseless = ((re->first_byte & REQ_CASELESS) == 0)? "" : " (caseless)"; |
| 5163 |
if (isprint(ch)) printf("First char = %c%s\n", ch, caseless); |
if (isprint(ch)) printf("First char = %c%s\n", ch, caseless); |
| 5164 |
else printf("First char = \\x%02x%s\n", ch, caseless); |
else printf("First char = \\x%02x%s\n", ch, caseless); |
| 5165 |
} |
} |
| 5167 |
if ((re->options & PCRE_REQCHSET) != 0) |
if ((re->options & PCRE_REQCHSET) != 0) |
| 5168 |
{ |
{ |
| 5169 |
int ch = re->req_byte & 255; |
int ch = re->req_byte & 255; |
| 5170 |
char *caseless = ((re->req_byte & REQ_CASELESS) == 0)? "" : " (caseless)"; |
const char *caseless = ((re->req_byte & REQ_CASELESS) == 0)? "" : " (caseless)"; |
| 5171 |
if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless); |
if (isprint(ch)) printf("Req char = %c%s\n", ch, caseless); |
| 5172 |
else printf("Req char = \\x%02x%s\n", ch, caseless); |
else printf("Req char = \\x%02x%s\n", ch, caseless); |
| 5173 |
} |
} |
| 5301 |
#endif |
#endif |
| 5302 |
|
|
| 5303 |
|
|
| 5304 |
|
/*************************************************************************** |
| 5305 |
|
**************************************************************************** |
| 5306 |
|
RECURSION IN THE match() FUNCTION |
| 5307 |
|
|
| 5308 |
|
The match() function is highly recursive. Some regular expressions can cause |
| 5309 |
|
it to recurse thousands of times. I was writing for Unix, so I just let it |
| 5310 |
|
call itself recursively. This uses the stack for saving everything that has |
| 5311 |
|
to be saved for a recursive call. On Unix, the stack can be large, and this |
| 5312 |
|
works fine. |
| 5313 |
|
|
| 5314 |
|
It turns out that on non-Unix systems there are problems with programs that |
| 5315 |
|
use a lot of stack. (This despite the fact that every last chip has oodles |
| 5316 |
|
of memory these days, and techniques for extending the stack have been known |
| 5317 |
|
for decades.) So.... |
| 5318 |
|
|
| 5319 |
|
There is a fudge, triggered by defining NO_RECURSE, which avoids recursive |
| 5320 |
|
calls by keeping local variables that need to be preserved in blocks of memory |
| 5321 |
|
obtained from malloc instead instead of on the stack. Macros are used to |
| 5322 |
|
achieve this so that the actual code doesn't look very different to what it |
| 5323 |
|
always used to. |
| 5324 |
|
**************************************************************************** |
| 5325 |
|
***************************************************************************/ |
| 5326 |
|
|
| 5327 |
|
|
| 5328 |
|
/* These versions of the macros use the stack, as normal */ |
| 5329 |
|
|
| 5330 |
|
#ifndef NO_RECURSE |
| 5331 |
|
#define REGISTER register |
| 5332 |
|
#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg) rx = match(ra,rb,rc,rd,re,rf,rg) |
| 5333 |
|
#define RRETURN(ra) return ra |
| 5334 |
|
#else |
| 5335 |
|
|
| 5336 |
|
|
| 5337 |
|
/* These versions of the macros manage a private stack on the heap. Note |
| 5338 |
|
that the rd argument of RMATCH isn't actually used. It's the md argument of |
| 5339 |
|
match(), which never actually changes. */ |
| 5340 |
|
|
| 5341 |
|
#define REGISTER |
| 5342 |
|
|
| 5343 |
|
#define RMATCH(rx,ra,rb,rc,rd,re,rf,rg)\ |
| 5344 |
|
{\ |
| 5345 |
|
heapframe *newframe = (pcre_stack_malloc)(sizeof(heapframe));\ |
| 5346 |
|
if (setjmp(frame->Xwhere) == 0)\ |
| 5347 |
|
{\ |
| 5348 |
|
newframe->Xeptr = ra;\ |
| 5349 |
|
newframe->Xecode = rb;\ |
| 5350 |
|
newframe->Xoffset_top = rc;\ |
| 5351 |
|
newframe->Xims = re;\ |
| 5352 |
|
newframe->Xeptrb = rf;\ |
| 5353 |
|
newframe->Xflags = rg;\ |
| 5354 |
|
newframe->Xprevframe = frame;\ |
| 5355 |
|
frame = newframe;\ |
| 5356 |
|
DPRINTF(("restarting from line %d\n", __LINE__));\ |
| 5357 |
|
goto HEAP_RECURSE;\ |
| 5358 |
|
}\ |
| 5359 |
|
else\ |
| 5360 |
|
{\ |
| 5361 |
|
DPRINTF(("longjumped back to line %d\n", __LINE__));\ |
| 5362 |
|
frame = md->thisframe;\ |
| 5363 |
|
rx = frame->Xresult;\ |
| 5364 |
|
}\ |
| 5365 |
|
} |
| 5366 |
|
|
| 5367 |
|
#define RRETURN(ra)\ |
| 5368 |
|
{\ |
| 5369 |
|
heapframe *newframe = frame;\ |
| 5370 |
|
frame = newframe->Xprevframe;\ |
| 5371 |
|
(pcre_stack_free)(newframe);\ |
| 5372 |
|
if (frame != NULL)\ |
| 5373 |
|
{\ |
| 5374 |
|
frame->Xresult = ra;\ |
| 5375 |
|
md->thisframe = frame;\ |
| 5376 |
|
longjmp(frame->Xwhere, 1);\ |
| 5377 |
|
}\ |
| 5378 |
|
return ra;\ |
| 5379 |
|
} |
| 5380 |
|
|
| 5381 |
|
|
| 5382 |
|
/* Structure for remembering the local variables in a private frame */ |
| 5383 |
|
|
| 5384 |
|
typedef struct heapframe { |
| 5385 |
|
struct heapframe *Xprevframe; |
| 5386 |
|
|
| 5387 |
|
/* Function arguments that may change */ |
| 5388 |
|
|
| 5389 |
|
const uschar *Xeptr; |
| 5390 |
|
const uschar *Xecode; |
| 5391 |
|
int Xoffset_top; |
| 5392 |
|
long int Xims; |
| 5393 |
|
eptrblock *Xeptrb; |
| 5394 |
|
int Xflags; |
| 5395 |
|
|
| 5396 |
|
/* Function local variables */ |
| 5397 |
|
|
| 5398 |
|
const uschar *Xcallpat; |
| 5399 |
|
const uschar *Xcharptr; |
| 5400 |
|
const uschar *Xdata; |
| 5401 |
|
const uschar *Xlastptr; |
| 5402 |
|
const uschar *Xnext; |
| 5403 |
|
const uschar *Xpp; |
| 5404 |
|
const uschar *Xprev; |
| 5405 |
|
const uschar *Xsaved_eptr; |
| 5406 |
|
|
| 5407 |
|
recursion_info Xnew_recursive; |
| 5408 |
|
|
| 5409 |
|
BOOL Xcur_is_word; |
| 5410 |
|
BOOL Xcondition; |
| 5411 |
|
BOOL Xminimize; |
| 5412 |
|
BOOL Xprev_is_word; |
| 5413 |
|
|
| 5414 |
|
unsigned long int Xoriginal_ims; |
| 5415 |
|
|
| 5416 |
|
int Xctype; |
| 5417 |
|
int Xfc; |
| 5418 |
|
int Xfi; |
| 5419 |
|
int Xlength; |
| 5420 |
|
int Xmax; |
| 5421 |
|
int Xmin; |
| 5422 |
|
int Xnumber; |
| 5423 |
|
int Xoffset; |
| 5424 |
|
int Xop; |
| 5425 |
|
int Xsave_capture_last; |
| 5426 |
|
int Xsave_offset1, Xsave_offset2, Xsave_offset3; |
| 5427 |
|
int Xstacksave[REC_STACK_SAVE_MAX]; |
| 5428 |
|
|
| 5429 |
|
eptrblock Xnewptrb; |
| 5430 |
|
|
| 5431 |
|
/* Place to pass back result, and where to jump back to */ |
| 5432 |
|
|
| 5433 |
|
int Xresult; |
| 5434 |
|
jmp_buf Xwhere; |
| 5435 |
|
|
| 5436 |
|
} heapframe; |
| 5437 |
|
|
| 5438 |
|
#endif |
| 5439 |
|
|
| 5440 |
|
|
| 5441 |
|
/*************************************************************************** |
| 5442 |
|
***************************************************************************/ |
| 5443 |
|
|
| 5444 |
|
|
| 5445 |
|
|
| 5446 |
/************************************************* |
/************************************************* |
| 5478 |
*/ |
*/ |
| 5479 |
|
|
| 5480 |
static int |
static int |
| 5481 |
match(register const uschar *eptr, register const uschar *ecode, |
match(REGISTER const uschar *eptr, REGISTER const uschar *ecode, |
| 5482 |
int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, |
int offset_top, match_data *md, unsigned long int ims, eptrblock *eptrb, |
| 5483 |
int flags) |
int flags) |
| 5484 |
{ |
{ |
| 5485 |
unsigned long int original_ims = ims; /* Save for resetting on ')' */ |
/* These variables do not need to be preserved over recursion in this function, |
| 5486 |
register int rrc; |
so they can be ordinary variables in all cases. Mark them with "register" |
| 5487 |
|
because they are used a lot in loops. */ |
| 5488 |
|
|
| 5489 |
|
register int rrc; /* Returns from recursive calls */ |
| 5490 |
|
register int i; /* Used for loops not involving calls to RMATCH() */ |
| 5491 |
|
register int c; /* Character values not kept over RMATCH() calls */ |
| 5492 |
|
|
| 5493 |
|
/* When recursion is not being used, all "local" variables that have to be |
| 5494 |
|
preserved over calls to RMATCH() are part of a "frame" which is obtained from |
| 5495 |
|
heap storage. Set up the top-level frame here; others are obtained from the |
| 5496 |
|
heap whenever RMATCH() does a "recursion". See the macro definitions above. */ |
| 5497 |
|
|
| 5498 |
|
#ifdef NO_RECURSE |
| 5499 |
|
heapframe *frame = (pcre_stack_malloc)(sizeof(heapframe)); |
| 5500 |
|
frame->Xprevframe = NULL; /* Marks the top level */ |
| 5501 |
|
|
| 5502 |
|
/* Copy in the original argument variables */ |
| 5503 |
|
|
| 5504 |
|
frame->Xeptr = eptr; |
| 5505 |
|
frame->Xecode = ecode; |
| 5506 |
|
frame->Xoffset_top = offset_top; |
| 5507 |
|
frame->Xims = ims; |
| 5508 |
|
frame->Xeptrb = eptrb; |
| 5509 |
|
frame->Xflags = flags; |
| 5510 |
|
|
| 5511 |
|
/* This is where control jumps back to to effect "recursion" */ |
| 5512 |
|
|
| 5513 |
|
HEAP_RECURSE: |
| 5514 |
|
|
| 5515 |
|
/* Macros make the argument variables come from the current frame */ |
| 5516 |
|
|
| 5517 |
|
#define eptr frame->Xeptr |
| 5518 |
|
#define ecode frame->Xecode |
| 5519 |
|
#define offset_top frame->Xoffset_top |
| 5520 |
|
#define ims frame->Xims |
| 5521 |
|
#define eptrb frame->Xeptrb |
| 5522 |
|
#define flags frame->Xflags |
| 5523 |
|
|
| 5524 |
|
/* Ditto for the local variables */ |
| 5525 |
|
|
| 5526 |
|
#define callpat frame->Xcallpat |
| 5527 |
|
#define charptr frame->Xcharptr |
| 5528 |
|
#define data frame->Xdata |
| 5529 |
|
#define lastptr frame->Xlastptr |
| 5530 |
|
#define next frame->Xnext |
| 5531 |
|
#define pp frame->Xpp |
| 5532 |
|
#define prev frame->Xprev |
| 5533 |
|
#define saved_eptr frame->Xsaved_eptr |
| 5534 |
|
|
| 5535 |
|
#define new_recursive frame->Xnew_recursive |
| 5536 |
|
|
| 5537 |
|
#define cur_is_word frame->Xcur_is_word |
| 5538 |
|
#define condition frame->Xcondition |
| 5539 |
|
#define minimize frame->Xminimize |
| 5540 |
|
#define prev_is_word frame->Xprev_is_word |
| 5541 |
|
|
| 5542 |
|
#define original_ims frame->Xoriginal_ims |
| 5543 |
|
|
| 5544 |
|
#define ctype frame->Xctype |
| 5545 |
|
#define fc frame->Xfc |
| 5546 |
|
#define fi frame->Xfi |
| 5547 |
|
#define length frame->Xlength |
| 5548 |
|
#define max frame->Xmax |
| 5549 |
|
#define min frame->Xmin |
| 5550 |
|
#define number frame->Xnumber |
| 5551 |
|
#define offset frame->Xoffset |
| 5552 |
|
#define op frame->Xop |
| 5553 |
|
#define save_capture_last frame->Xsave_capture_last |
| 5554 |
|
#define save_offset1 frame->Xsave_offset1 |
| 5555 |
|
#define save_offset2 frame->Xsave_offset2 |
| 5556 |
|
#define save_offset3 frame->Xsave_offset3 |
| 5557 |
|
#define stacksave frame->Xstacksave |
| 5558 |
|
|
| 5559 |
|
#define newptrb frame->Xnewptrb |
| 5560 |
|
|
| 5561 |
|
/* When recursion is being used, local variables are allocated on the stack and |
| 5562 |
|
get preserved during recursion in the normal way. In this environment, fi and |
| 5563 |
|
i, and fc and c, can be the same variables. */ |
| 5564 |
|
|
| 5565 |
|
#else |
| 5566 |
|
#define fi i |
| 5567 |
|
#define fc c |
| 5568 |
|
|
| 5569 |
|
const uschar *callpat; /* Many of these variables are used ony */ |
| 5570 |
|
const uschar *charptr; /* small blocks of the code. My normal */ |
| 5571 |
|
const uschar *data; /* style of coding would have declared */ |
| 5572 |
|
const uschar *lastptr; /* them within each of those blocks. */ |
| 5573 |
|
const uschar *next; /* However, in order to accommodate the */ |
| 5574 |
|
const uschar *pp; /* version of this code that uses an */ |
| 5575 |
|
const uschar *prev; /* external "stack" implemented on the */ |
| 5576 |
|
const uschar *saved_eptr; /* heap, it is easier to declare them */ |
| 5577 |
|
/* all here, so the declarations can */ |
| 5578 |
|
recursion_info new_recursive; /* be cut out in a block. The only */ |
| 5579 |
|
/* declarations within blocks below are */ |
| 5580 |
|
BOOL cur_is_word; /* for variables that do not have to */ |
| 5581 |
|
BOOL condition; /* be preserved over a recursive call */ |
| 5582 |
|
BOOL minimize; /* to RMATCH(). */ |
| 5583 |
|
BOOL prev_is_word; |
| 5584 |
|
|
| 5585 |
|
unsigned long int original_ims; |
| 5586 |
|
|
| 5587 |
|
int ctype; |
| 5588 |
|
int length; |
| 5589 |
|
int max; |
| 5590 |
|
int min; |
| 5591 |
|
int number; |
| 5592 |
|
int offset; |
| 5593 |
|
int op; |
| 5594 |
|
int save_capture_last; |
| 5595 |
|
int save_offset1, save_offset2, save_offset3; |
| 5596 |
|
int stacksave[REC_STACK_SAVE_MAX]; |
| 5597 |
|
|
| 5598 |
eptrblock newptrb; |
eptrblock newptrb; |
| 5599 |
|
#endif |
| 5600 |
|
|
| 5601 |
|
|
| 5602 |
|
/* OK, now we can get on with the real code of the function. Recursion is |
| 5603 |
|
specified by the macros RMATCH and RRETURN. When NO_RECURSE is *not* defined, |
| 5604 |
|
these just turn into a recursive call to match() and a "return", respectively. |
| 5605 |
|
However, RMATCH isn't like a function call because it's quite a complicated |
| 5606 |
|
macro. It has to be used in one particular way. This shouldn't, however, impact |
| 5607 |
|
performance when true recursion is being used. */ |
| 5608 |
|
|
| 5609 |
|
if (md->match_call_count++ >= md->match_limit) RRETURN(PCRE_ERROR_MATCHLIMIT); |
| 5610 |
|
|
| 5611 |
if (md->match_call_count++ >= md->match_limit) return PCRE_ERROR_MATCHLIMIT; |
original_ims = ims; /* Save for resetting on ')' */ |
| 5612 |
|
|
| 5613 |
/* At the start of a bracketed group, add the current subject pointer to the |
/* At the start of a bracketed group, add the current subject pointer to the |
| 5614 |
stack of such pointers, to be re-instated at the end of the group when we hit |
stack of such pointers, to be re-instated at the end of the group when we hit |
| 5615 |
the closing ket. When match() is called in other circumstances, we don't add to |
the closing ket. When match() is called in other circumstances, we don't add to |
| 5616 |
the stack. */ |
this stack. */ |
| 5617 |
|
|
| 5618 |
if ((flags & match_isgroup) != 0) |
if ((flags & match_isgroup) != 0) |
| 5619 |
{ |
{ |
| 5620 |
newptrb.prev = eptrb; |
newptrb.epb_prev = eptrb; |
| 5621 |
newptrb.saved_eptr = eptr; |
newptrb.epb_saved_eptr = eptr; |
| 5622 |
eptrb = &newptrb; |
eptrb = &newptrb; |
| 5623 |
} |
} |
| 5624 |
|
|
| 5626 |
|
|
| 5627 |
for (;;) |
for (;;) |
| 5628 |
{ |
{ |
| 5629 |
int op = (int)*ecode; |
op = *ecode; |
| 5630 |
int min, max, ctype; |
minimize = FALSE; |
|
register int i; |
|
|
register int c; |
|
|
BOOL minimize = FALSE; |
|
| 5631 |
|
|
| 5632 |
/* Opening capturing bracket. If there is space in the offset vector, save |
/* Opening capturing bracket. If there is space in the offset vector, save |
| 5633 |
the current subject position in the working slot at the top of the vector. We |
the current subject position in the working slot at the top of the vector. We |
| 5645 |
|
|
| 5646 |
if (op > OP_BRA) |
if (op > OP_BRA) |
| 5647 |
{ |
{ |
| 5648 |
int offset; |
number = op - OP_BRA; |
|
int number = op - OP_BRA; |
|
| 5649 |
|
|
| 5650 |
/* For extended extraction brackets (large number), we have to fish out the |
/* For extended extraction brackets (large number), we have to fish out the |
| 5651 |
number from a dummy opcode at the start. */ |
number from a dummy opcode at the start. */ |
| 5662 |
|
|
| 5663 |
if (offset < md->offset_max) |
if (offset < md->offset_max) |
| 5664 |
{ |
{ |
| 5665 |
int save_offset1 = md->offset_vector[offset]; |
save_offset1 = md->offset_vector[offset]; |
| 5666 |
int save_offset2 = md->offset_vector[offset+1]; |
save_offset2 = md->offset_vector[offset+1]; |
| 5667 |
int save_offset3 = md->offset_vector[md->offset_end - number]; |
save_offset3 = md->offset_vector[md->offset_end - number]; |
| 5668 |
int save_capture_last = md->capture_last; |
save_capture_last = md->capture_last; |
| 5669 |
|
|
| 5670 |
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); |
DPRINTF(("saving %d %d %d\n", save_offset1, save_offset2, save_offset3)); |
| 5671 |
md->offset_vector[md->offset_end - number] = eptr - md->start_subject; |
md->offset_vector[md->offset_end - number] = eptr - md->start_subject; |
| 5672 |
|
|
| 5673 |
do |
do |
| 5674 |
{ |
{ |
| 5675 |
if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, |
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, |
| 5676 |
eptrb, match_isgroup)) != MATCH_NOMATCH) return rrc; |
match_isgroup); |
| 5677 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 5678 |
md->capture_last = save_capture_last; |
md->capture_last = save_capture_last; |
| 5679 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 5680 |
} |
} |
| 5686 |
md->offset_vector[offset+1] = save_offset2; |
md->offset_vector[offset+1] = save_offset2; |
| 5687 |
md->offset_vector[md->offset_end - number] = save_offset3; |
md->offset_vector[md->offset_end - number] = save_offset3; |
| 5688 |
|
|
| 5689 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 5690 |
} |
} |
| 5691 |
|
|
| 5692 |
/* Insufficient room for saving captured contents */ |
/* Insufficient room for saving captured contents */ |
| 5702 |
DPRINTF(("start bracket 0\n")); |
DPRINTF(("start bracket 0\n")); |
| 5703 |
do |
do |
| 5704 |
{ |
{ |
| 5705 |
if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, |
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, |
| 5706 |
match_isgroup)) != MATCH_NOMATCH) return rrc; |
match_isgroup); |
| 5707 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 5708 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 5709 |
} |
} |
| 5710 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 5711 |
DPRINTF(("bracket 0 failed\n")); |
DPRINTF(("bracket 0 failed\n")); |
| 5712 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 5713 |
|
|
| 5714 |
/* Conditional group: compilation checked that there are no more than |
/* Conditional group: compilation checked that there are no more than |
| 5715 |
two branches. If the condition is false, skipping the first branch takes us |
two branches. If the condition is false, skipping the first branch takes us |
| 5719 |
case OP_COND: |
case OP_COND: |
| 5720 |
if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */ |
if (ecode[LINK_SIZE+1] == OP_CREF) /* Condition extract or recurse test */ |
| 5721 |
{ |
{ |
| 5722 |
int offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */ |
offset = GET2(ecode, LINK_SIZE+2) << 1; /* Doubled ref number */ |
| 5723 |
BOOL condition = (offset == CREF_RECURSE * 2)? |
condition = (offset == CREF_RECURSE * 2)? |
| 5724 |
(md->recursive != NULL) : |
(md->recursive != NULL) : |
| 5725 |
(offset < offset_top && md->offset_vector[offset] >= 0); |
(offset < offset_top && md->offset_vector[offset] >= 0); |
| 5726 |
return match(eptr, ecode + (condition? |
RMATCH(rrc, eptr, ecode + (condition? |
| 5727 |
(LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))), |
(LINK_SIZE + 4) : (LINK_SIZE + 1 + GET(ecode, 1))), |
| 5728 |
offset_top, md, ims, eptrb, match_isgroup); |
offset_top, md, ims, eptrb, match_isgroup); |
| 5729 |
|
RRETURN(rrc); |
| 5730 |
} |
} |
| 5731 |
|
|
| 5732 |
/* The condition is an assertion. Call match() to evaluate it - setting |
/* The condition is an assertion. Call match() to evaluate it - setting |
| 5734 |
|
|
| 5735 |
else |
else |
| 5736 |
{ |
{ |
| 5737 |
if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, |
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, |
| 5738 |
match_condassert | match_isgroup)) == MATCH_MATCH) |
match_condassert | match_isgroup); |
| 5739 |
|
if (rrc == MATCH_MATCH) |
| 5740 |
{ |
{ |
| 5741 |
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2); |
ecode += 1 + LINK_SIZE + GET(ecode, LINK_SIZE+2); |
| 5742 |
while (*ecode == OP_ALT) ecode += GET(ecode, 1); |
while (*ecode == OP_ALT) ecode += GET(ecode, 1); |
| 5743 |
} |
} |
| 5744 |
else if (rrc != MATCH_NOMATCH) return rrc; |
else if (rrc != MATCH_NOMATCH) |
| 5745 |
|
{ |
| 5746 |
|
RRETURN(rrc); /* Need braces because of following else */ |
| 5747 |
|
} |
| 5748 |
else ecode += GET(ecode, 1); |
else ecode += GET(ecode, 1); |
| 5749 |
return match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, |
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, |
| 5750 |
match_isgroup); |
match_isgroup); |
| 5751 |
|
RRETURN(rrc); |
| 5752 |
} |
} |
| 5753 |
/* Control never reaches here */ |
/* Control never reaches here */ |
| 5754 |
|
|
| 5768 |
{ |
{ |
| 5769 |
recursion_info *rec = md->recursive; |
recursion_info *rec = md->recursive; |
| 5770 |
DPRINTF(("Hit the end in a (?0) recursion\n")); |
DPRINTF(("Hit the end in a (?0) recursion\n")); |
| 5771 |
md->recursive = rec->prev; |
md->recursive = rec->prevrec; |
| 5772 |
memmove(md->offset_vector, rec->offset_save, |
memmove(md->offset_vector, rec->offset_save, |
| 5773 |
rec->saved_max * sizeof(int)); |
rec->saved_max * sizeof(int)); |
| 5774 |
md->start_match = rec->save_start; |
md->start_match = rec->save_start; |
| 5780 |
/* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty |
/* Otherwise, if PCRE_NOTEMPTY is set, fail if we have matched an empty |
| 5781 |
string - backtracking will then try other alternatives, if any. */ |
string - backtracking will then try other alternatives, if any. */ |
| 5782 |
|
|
| 5783 |
if (md->notempty && eptr == md->start_match) return MATCH_NOMATCH; |
if (md->notempty && eptr == md->start_match) RRETURN(MATCH_NOMATCH); |
| 5784 |
md->end_match_ptr = eptr; /* Record where we ended */ |
md->end_match_ptr = eptr; /* Record where we ended */ |
| 5785 |
md->end_offset_top = offset_top; /* and how many extracts were taken */ |
md->end_offset_top = offset_top; /* and how many extracts were taken */ |
| 5786 |
return MATCH_MATCH; |
RRETURN(MATCH_MATCH); |
| 5787 |
|
|
| 5788 |
/* Change option settings */ |
/* Change option settings */ |
| 5789 |
|
|
| 5803 |
case OP_ASSERTBACK: |
case OP_ASSERTBACK: |
| 5804 |
do |
do |
| 5805 |
{ |
{ |
| 5806 |
if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, |
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, |
| 5807 |
match_isgroup)) == MATCH_MATCH) break; |
match_isgroup); |
| 5808 |
if (rrc != MATCH_NOMATCH) return rrc; |
if (rrc == MATCH_MATCH) break; |
| 5809 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 5810 |
ecode += GET(ecode, 1); |
ecode += GET(ecode, 1); |
| 5811 |
} |
} |
| 5812 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 5813 |
if (*ecode == OP_KET) return MATCH_NOMATCH; |
if (*ecode == OP_KET) RRETURN(MATCH_NOMATCH); |
| 5814 |
|
|
| 5815 |
/* If checking an assertion for a condition, return MATCH_MATCH. */ |
/* If checking an assertion for a condition, return MATCH_MATCH. */ |
| 5816 |
|
|
| 5817 |
if ((flags & match_condassert) != 0) return MATCH_MATCH; |
if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH); |
| 5818 |
|
|
| 5819 |
/* Continue from after the assertion, updating the offsets high water |
/* Continue from after the assertion, updating the offsets high water |
| 5820 |
mark, since extracts may have been taken during the assertion. */ |
mark, since extracts may have been taken during the assertion. */ |
| 5830 |
case OP_ASSERTBACK_NOT: |
case OP_ASSERTBACK_NOT: |
| 5831 |
do |
do |
| 5832 |
{ |
{ |
| 5833 |
if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, |
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, NULL, |
| 5834 |
match_isgroup)) == MATCH_MATCH) return MATCH_NOMATCH; |
match_isgroup); |
| 5835 |
if (rrc != MATCH_NOMATCH) return rrc; |
if (rrc == MATCH_MATCH) RRETURN(MATCH_NOMATCH); |
| 5836 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 5837 |
ecode += GET(ecode,1); |
ecode += GET(ecode,1); |
| 5838 |
} |
} |
| 5839 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 5840 |
|
|
| 5841 |
if ((flags & match_condassert) != 0) return MATCH_MATCH; |
if ((flags & match_condassert) != 0) RRETURN(MATCH_MATCH); |
| 5842 |
|
|
| 5843 |
ecode += 1 + LINK_SIZE; |
ecode += 1 + LINK_SIZE; |
| 5844 |
continue; |
continue; |
| 5856 |
for (i = 0; i < c; i++) |
for (i = 0; i < c; i++) |
| 5857 |
{ |
{ |
| 5858 |
eptr--; |
eptr--; |
| 5859 |
if (eptr < md->start_subject) return MATCH_NOMATCH; |
if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); |
| 5860 |
BACKCHAR(eptr) |
BACKCHAR(eptr) |
| 5861 |
} |
} |
| 5862 |
} |
} |
| 5867 |
|
|
| 5868 |
{ |
{ |
| 5869 |
eptr -= GET(ecode,1); |
eptr -= GET(ecode,1); |
| 5870 |
if (eptr < md->start_subject) return MATCH_NOMATCH; |
if (eptr < md->start_subject) RRETURN(MATCH_NOMATCH); |
| 5871 |
} |
} |
| 5872 |
|
|
| 5873 |
/* Skip to next op code */ |
/* Skip to next op code */ |
| 5893 |
cb.capture_top = offset_top/2; |
cb.capture_top = offset_top/2; |
| 5894 |
cb.capture_last = md->capture_last; |
cb.capture_last = md->capture_last; |
| 5895 |
cb.callout_data = md->callout_data; |
cb.callout_data = md->callout_data; |
| 5896 |
if ((rrc = (*pcre_callout)(&cb)) > 0) return MATCH_NOMATCH; |
if ((rrc = (*pcre_callout)(&cb)) > 0) RRETURN(MATCH_NOMATCH); |
| 5897 |
if (rrc < 0) return rrc; |
if (rrc < 0) RRETURN(rrc); |
| 5898 |
} |
} |
| 5899 |
ecode += 2; |
ecode += 2; |
| 5900 |
break; |
break; |
| 5901 |
|
|
| 5902 |
/* Recursion either matches the current regex, or some subexpression. The |
/* Recursion either matches the current regex, or some subexpression. The |
| 5903 |
offset data is the offset to the starting bracket from the start of the |
offset data is the offset to the starting bracket from the start of the |
| 5904 |
whole pattern. However, it is possible that a BRAZERO was inserted before |
whole pattern. (This is so that it works from duplicated subpatterns.) |
|
this bracket after we took the offset - we just skip it if encountered. |
|
| 5905 |
|
|
| 5906 |
If there are any capturing brackets started but not finished, we have to |
If there are any capturing brackets started but not finished, we have to |
| 5907 |
save their starting points and reinstate them after the recursion. However, |
save their starting points and reinstate them after the recursion. However, |
| 5920 |
|
|
| 5921 |
case OP_RECURSE: |
case OP_RECURSE: |
| 5922 |
{ |
{ |
| 5923 |
int stacksave[REC_STACK_SAVE_MAX]; |
callpat = md->start_code + GET(ecode, 1); |
|
recursion_info new_recursive; |
|
|
const uschar *callpat = md->start_code + GET(ecode, 1); |
|
|
|
|
|
if (*callpat == OP_BRAZERO) callpat++; |
|
|
|
|
| 5924 |
new_recursive.group_num = *callpat - OP_BRA; |
new_recursive.group_num = *callpat - OP_BRA; |
| 5925 |
|
|
| 5926 |
/* For extended extraction brackets (large number), we have to fish out |
/* For extended extraction brackets (large number), we have to fish out |
| 5931 |
|
|
| 5932 |
/* Add to "recursing stack" */ |
/* Add to "recursing stack" */ |
| 5933 |
|
|
| 5934 |
new_recursive.prev = md->recursive; |
new_recursive.prevrec = md->recursive; |
| 5935 |
md->recursive = &new_recursive; |
md->recursive = &new_recursive; |
| 5936 |
|
|
| 5937 |
/* Find where to continue from afterwards */ |
/* Find where to continue from afterwards */ |
| 5948 |
{ |
{ |
| 5949 |
new_recursive.offset_save = |
new_recursive.offset_save = |
| 5950 |
(int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); |
(int *)(pcre_malloc)(new_recursive.saved_max * sizeof(int)); |
| 5951 |
if (new_recursive.offset_save == NULL) return PCRE_ERROR_NOMEMORY; |
if (new_recursive.offset_save == NULL) RRETURN(PCRE_ERROR_NOMEMORY); |
| 5952 |
} |
} |
| 5953 |
|
|
| 5954 |
memcpy(new_recursive.offset_save, md->offset_vector, |
memcpy(new_recursive.offset_save, md->offset_vector, |
| 5962 |
DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); |
DPRINTF(("Recursing into group %d\n", new_recursive.group_num)); |
| 5963 |
do |
do |
| 5964 |
{ |
{ |
| 5965 |
if ((rrc = match(eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims, |
RMATCH(rrc, eptr, callpat + 1 + LINK_SIZE, offset_top, md, ims, |
| 5966 |
eptrb, match_isgroup)) == MATCH_MATCH) |
eptrb, match_isgroup); |
| 5967 |
|
if (rrc == MATCH_MATCH) |
| 5968 |
{ |
{ |
| 5969 |
md->recursive = new_recursive.prev; |
md->recursive = new_recursive.prevrec; |
| 5970 |
if (new_recursive.offset_save != stacksave) |
if (new_recursive.offset_save != stacksave) |
| 5971 |
(pcre_free)(new_recursive.offset_save); |
(pcre_free)(new_recursive.offset_save); |
| 5972 |
return MATCH_MATCH; |
RRETURN(MATCH_MATCH); |
| 5973 |
} |
} |
| 5974 |
else if (rrc != MATCH_NOMATCH) return rrc; |
else if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 5975 |
|
|
| 5976 |
md->recursive = &new_recursive; |
md->recursive = &new_recursive; |
| 5977 |
memcpy(md->offset_vector, new_recursive.offset_save, |
memcpy(md->offset_vector, new_recursive.offset_save, |
| 5981 |
while (*callpat == OP_ALT); |
while (*callpat == OP_ALT); |
| 5982 |
|
|
| 5983 |
DPRINTF(("Recursion didn't match\n")); |
DPRINTF(("Recursion didn't match\n")); |
| 5984 |
md->recursive = new_recursive.prev; |
md->recursive = new_recursive.prevrec; |
| 5985 |
if (new_recursive.offset_save != stacksave) |
if (new_recursive.offset_save != stacksave) |
| 5986 |
(pcre_free)(new_recursive.offset_save); |
(pcre_free)(new_recursive.offset_save); |
| 5987 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 5988 |
} |
} |
| 5989 |
/* Control never reaches here */ |
/* Control never reaches here */ |
| 5990 |
|
|
| 5997 |
|
|
| 5998 |
case OP_ONCE: |
case OP_ONCE: |
| 5999 |
{ |
{ |
| 6000 |
const uschar *prev = ecode; |
prev = ecode; |
| 6001 |
const uschar *saved_eptr = eptr; |
saved_eptr = eptr; |
| 6002 |
|
|
| 6003 |
do |
do |
| 6004 |
{ |
{ |
| 6005 |
if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, |
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, |
| 6006 |
eptrb, match_isgroup)) == MATCH_MATCH) break; |
eptrb, match_isgroup); |
| 6007 |
if (rrc != MATCH_NOMATCH) return rrc; |
if (rrc == MATCH_MATCH) break; |
| 6008 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6009 |
ecode += GET(ecode,1); |
ecode += GET(ecode,1); |
| 6010 |
} |
} |
| 6011 |
while (*ecode == OP_ALT); |
while (*ecode == OP_ALT); |
| 6012 |
|
|
| 6013 |
/* If hit the end of the group (which could be repeated), fail */ |
/* If hit the end of the group (which could be repeated), fail */ |
| 6014 |
|
|
| 6015 |
if (*ecode != OP_ONCE && *ecode != OP_ALT) return MATCH_NOMATCH; |
if (*ecode != OP_ONCE && *ecode != OP_ALT) RRETURN(MATCH_NOMATCH); |
| 6016 |
|
|
| 6017 |
/* Continue as from after the assertion, updating the offsets high water |
/* Continue as from after the assertion, updating the offsets high water |
| 6018 |
mark, since extracts may have been taken. */ |
mark, since extracts may have been taken. */ |
| 6047 |
|
|
| 6048 |
if (*ecode == OP_KETRMIN) |
if (*ecode == OP_KETRMIN) |
| 6049 |
{ |
{ |
| 6050 |
if ((rrc = match(eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, |
RMATCH(rrc, eptr, ecode + 1 + LINK_SIZE, offset_top, md, ims, eptrb, 0); |
| 6051 |
eptrb, 0)) != MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6052 |
if ((rrc = match(eptr, prev, offset_top, md, ims, eptrb, |
RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup); |
| 6053 |
match_isgroup)) != MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6054 |
} |
} |
| 6055 |
else /* OP_KETRMAX */ |
else /* OP_KETRMAX */ |
| 6056 |
{ |
{ |
| 6057 |
if ((rrc = match(eptr, prev, offset_top, md, ims, eptrb, |
RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup); |
| 6058 |
match_isgroup)) != MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6059 |
if ((rrc = match(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, |
RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0); |
| 6060 |
0)) != MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6061 |
} |
} |
| 6062 |
} |
} |
| 6063 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6064 |
|
|
| 6065 |
/* An alternation is the end of a branch; scan along to find the end of the |
/* An alternation is the end of a branch; scan along to find the end of the |
| 6066 |
bracketed group and go to there. */ |
bracketed group and go to there. */ |
| 6077 |
|
|
| 6078 |
case OP_BRAZERO: |
case OP_BRAZERO: |
| 6079 |
{ |
{ |
| 6080 |
const uschar *next = ecode+1; |
next = ecode+1; |
| 6081 |
if ((rrc = match(eptr, next, offset_top, md, ims, eptrb, match_isgroup)) |
RMATCH(rrc, eptr, next, offset_top, md, ims, eptrb, match_isgroup); |
| 6082 |
!= MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6083 |
do next += GET(next,1); while (*next == OP_ALT); |
do next += GET(next,1); while (*next == OP_ALT); |
| 6084 |
ecode = next + 1+LINK_SIZE; |
ecode = next + 1+LINK_SIZE; |
| 6085 |
} |
} |
| 6087 |
|
|
| 6088 |
case OP_BRAMINZERO: |
case OP_BRAMINZERO: |
| 6089 |
{ |
{ |
| 6090 |
const uschar *next = ecode+1; |
next = ecode+1; |
| 6091 |
do next += GET(next,1); while (*next == OP_ALT); |
do next += GET(next,1); while (*next == OP_ALT); |
| 6092 |
if ((rrc = match(eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, |
RMATCH(rrc, eptr, next + 1+LINK_SIZE, offset_top, md, ims, eptrb, |
| 6093 |
match_isgroup)) != MATCH_NOMATCH) return rrc; |
match_isgroup); |
| 6094 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6095 |
ecode++; |
ecode++; |
| 6096 |
} |
} |
| 6097 |
break; |
break; |
| 6105 |
case OP_KETRMIN: |
case OP_KETRMIN: |
| 6106 |
case OP_KETRMAX: |
case OP_KETRMAX: |
| 6107 |
{ |
{ |
| 6108 |
const uschar *prev = ecode - GET(ecode, 1); |
prev = ecode - GET(ecode, 1); |
| 6109 |
const uschar *saved_eptr = eptrb->saved_eptr; |
saved_eptr = eptrb->epb_saved_eptr; |
| 6110 |
|
|
| 6111 |
|
/* Back up the stack of bracket start pointers. */ |
| 6112 |
|
|
| 6113 |
eptrb = eptrb->prev; /* Back up the stack of bracket start pointers */ |
eptrb = eptrb->epb_prev; |
| 6114 |
|
|
| 6115 |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || |
if (*prev == OP_ASSERT || *prev == OP_ASSERT_NOT || |
| 6116 |
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || |
*prev == OP_ASSERTBACK || *prev == OP_ASSERTBACK_NOT || |
| 6118 |
{ |
{ |
| 6119 |
md->end_match_ptr = eptr; /* For ONCE */ |
md->end_match_ptr = eptr; /* For ONCE */ |
| 6120 |
md->end_offset_top = offset_top; |
md->end_offset_top = offset_top; |
| 6121 |
return MATCH_MATCH; |
RRETURN(MATCH_MATCH); |
| 6122 |
} |
} |
| 6123 |
|
|
| 6124 |
/* In all other cases except a conditional group we have to check the |
/* In all other cases except a conditional group we have to check the |
| 6127 |
|
|
| 6128 |
if (*prev != OP_COND) |
if (*prev != OP_COND) |
| 6129 |
{ |
{ |
| 6130 |
int offset; |
number = *prev - OP_BRA; |
|
int number = *prev - OP_BRA; |
|
| 6131 |
|
|
| 6132 |
/* For extended extraction brackets (large number), we have to fish out |
/* For extended extraction brackets (large number), we have to fish out |
| 6133 |
the number from a dummy opcode at the start. */ |
the number from a dummy opcode at the start. */ |
| 6163 |
{ |
{ |
| 6164 |
recursion_info *rec = md->recursive; |
recursion_info *rec = md->recursive; |
| 6165 |
DPRINTF(("Recursion (%d) succeeded - continuing\n", number)); |
DPRINTF(("Recursion (%d) succeeded - continuing\n", number)); |
| 6166 |
md->recursive = rec->prev; |
md->recursive = rec->prevrec; |
| 6167 |
md->start_match = rec->save_start; |
md->start_match = rec->save_start; |
| 6168 |
memcpy(md->offset_vector, rec->offset_save, |
memcpy(md->offset_vector, rec->offset_save, |
| 6169 |
rec->saved_max * sizeof(int)); |
rec->saved_max * sizeof(int)); |
| 6197 |
|
|
| 6198 |
if (*ecode == OP_KETRMIN) |
if (*ecode == OP_KETRMIN) |
| 6199 |
{ |
{ |
| 6200 |
if ((rrc = match(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, |
RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0); |
| 6201 |
0)) != MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6202 |
if ((rrc = match(eptr, prev, offset_top, md, ims, eptrb, |
RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup); |
| 6203 |
match_isgroup)) != MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6204 |
} |
} |
| 6205 |
else /* OP_KETRMAX */ |
else /* OP_KETRMAX */ |
| 6206 |
{ |
{ |
| 6207 |
if ((rrc = match(eptr, prev, offset_top, md, ims, eptrb, |
RMATCH(rrc, eptr, prev, offset_top, md, ims, eptrb, match_isgroup); |
| 6208 |
match_isgroup)) != MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6209 |
if ((rrc = match(eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, |
RMATCH(rrc, eptr, ecode + 1+LINK_SIZE, offset_top, md, ims, eptrb, 0); |
| 6210 |
0)) != MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6211 |
} |
} |
| 6212 |
} |
} |
| 6213 |
return MATCH_NOMATCH; |
|
| 6214 |
|
RRETURN(MATCH_NOMATCH); |
| 6215 |
|
|
| 6216 |
/* Start of subject unless notbol, or after internal newline if multiline */ |
/* Start of subject unless notbol, or after internal newline if multiline */ |
| 6217 |
|
|
| 6218 |
case OP_CIRC: |
case OP_CIRC: |
| 6219 |
if (md->notbol && eptr == md->start_subject) return MATCH_NOMATCH; |
if (md->notbol && eptr == md->start_subject) RRETURN(MATCH_NOMATCH); |
| 6220 |
if ((ims & PCRE_MULTILINE) != 0) |
if ((ims & PCRE_MULTILINE) != 0) |
| 6221 |
{ |
{ |
| 6222 |
if (eptr != md->start_subject && eptr[-1] != NEWLINE) |
if (eptr != md->start_subject && eptr[-1] != NEWLINE) |
| 6223 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6224 |
ecode++; |
ecode++; |
| 6225 |
break; |
break; |
| 6226 |
} |
} |
| 6229 |
/* Start of subject assertion */ |
/* Start of subject assertion */ |
| 6230 |
|
|
| 6231 |
case OP_SOD: |
case OP_SOD: |
| 6232 |
if (eptr != md->start_subject) return MATCH_NOMATCH; |
if (eptr != md->start_subject) RRETURN(MATCH_NOMATCH); |
| 6233 |
ecode++; |
ecode++; |
| 6234 |
break; |
break; |
| 6235 |
|
|
| 6236 |
/* Start of match assertion */ |
/* Start of match assertion */ |
| 6237 |
|
|
| 6238 |
case OP_SOM: |
case OP_SOM: |
| 6239 |
if (eptr != md->start_subject + md->start_offset) return MATCH_NOMATCH; |
if (eptr != md->start_subject + md->start_offset) RRETURN(MATCH_NOMATCH); |
| 6240 |
ecode++; |
ecode++; |
| 6241 |
break; |
break; |
| 6242 |
|
|
| 6247 |
if ((ims & PCRE_MULTILINE) != 0) |
if ((ims & PCRE_MULTILINE) != 0) |
| 6248 |
{ |
{ |
| 6249 |
if (eptr < md->end_subject) |
if (eptr < md->end_subject) |
| 6250 |
{ if (*eptr != NEWLINE) return MATCH_NOMATCH; } |
{ if (*eptr != NEWLINE) RRETURN(MATCH_NOMATCH); } |
| 6251 |
else |
else |
| 6252 |
{ if (md->noteol) return MATCH_NOMATCH; } |
{ if (md->noteol) RRETURN(MATCH_NOMATCH); } |
| 6253 |
ecode++; |
ecode++; |
| 6254 |
break; |
break; |
| 6255 |
} |
} |
| 6256 |
else |
else |
| 6257 |
{ |
{ |
| 6258 |
if (md->noteol) return MATCH_NOMATCH; |
if (md->noteol) RRETURN(MATCH_NOMATCH); |
| 6259 |
if (!md->endonly) |
if (!md->endonly) |
| 6260 |
{ |
{ |
| 6261 |
if (eptr < md->end_subject - 1 || |
if (eptr < md->end_subject - 1 || |
| 6262 |
(eptr == md->end_subject - 1 && *eptr != NEWLINE)) |
(eptr == md->end_subject - 1 && *eptr != NEWLINE)) |
| 6263 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6264 |
ecode++; |
ecode++; |
| 6265 |
break; |
break; |
| 6266 |
} |
} |
| 6270 |
/* End of subject assertion (\z) */ |
/* End of subject assertion (\z) */ |
| 6271 |
|
|
| 6272 |
case OP_EOD: |
case OP_EOD: |
| 6273 |
if (eptr < md->end_subject) return MATCH_NOMATCH; |
if (eptr < md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6274 |
ecode++; |
ecode++; |
| 6275 |
break; |
break; |
| 6276 |
|
|
| 6278 |
|
|
| 6279 |
case OP_EODN: |
case OP_EODN: |
| 6280 |
if (eptr < md->end_subject - 1 || |
if (eptr < md->end_subject - 1 || |
| 6281 |
(eptr == md->end_subject - 1 && *eptr != NEWLINE)) return MATCH_NOMATCH; |
(eptr == md->end_subject - 1 && *eptr != NEWLINE)) RRETURN(MATCH_NOMATCH); |
| 6282 |
ecode++; |
ecode++; |
| 6283 |
break; |
break; |
| 6284 |
|
|
| 6287 |
case OP_NOT_WORD_BOUNDARY: |
case OP_NOT_WORD_BOUNDARY: |
| 6288 |
case OP_WORD_BOUNDARY: |
case OP_WORD_BOUNDARY: |
| 6289 |
{ |
{ |
|
BOOL prev_is_word, cur_is_word; |
|
| 6290 |
|
|
| 6291 |
/* Find out if the previous and current characters are "word" characters. |
/* Find out if the previous and current characters are "word" characters. |
| 6292 |
It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to |
It takes a bit more work in UTF-8 mode. Characters > 255 are assumed to |
| 6297 |
{ |
{ |
| 6298 |
if (eptr == md->start_subject) prev_is_word = FALSE; else |
if (eptr == md->start_subject) prev_is_word = FALSE; else |
| 6299 |
{ |
{ |
| 6300 |
const uschar *lastptr = eptr - 1; |
lastptr = eptr - 1; |
| 6301 |
while((*lastptr & 0xc0) == 0x80) lastptr--; |
while((*lastptr & 0xc0) == 0x80) lastptr--; |
| 6302 |
GETCHAR(c, lastptr); |
GETCHAR(c, lastptr); |
| 6303 |
prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; |
prev_is_word = c < 256 && (md->ctypes[c] & ctype_word) != 0; |
| 6324 |
|
|
| 6325 |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
if ((*ecode++ == OP_WORD_BOUNDARY)? |
| 6326 |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
cur_is_word == prev_is_word : cur_is_word != prev_is_word) |
| 6327 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6328 |
} |
} |
| 6329 |
break; |
break; |
| 6330 |
|
|
| 6332 |
|
|
| 6333 |
case OP_ANY: |
case OP_ANY: |
| 6334 |
if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE) |
if ((ims & PCRE_DOTALL) == 0 && eptr < md->end_subject && *eptr == NEWLINE) |
| 6335 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6336 |
if (eptr++ >= md->end_subject) return MATCH_NOMATCH; |
if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6337 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 6338 |
if (md->utf8) |
if (md->utf8) |
| 6339 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 6345 |
any byte, even newline, independent of the setting of PCRE_DOTALL. */ |
any byte, even newline, independent of the setting of PCRE_DOTALL. */ |
| 6346 |
|
|
| 6347 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 6348 |
if (eptr++ >= md->end_subject) return MATCH_NOMATCH; |
if (eptr++ >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6349 |
ecode++; |
ecode++; |
| 6350 |
break; |
break; |
| 6351 |
|
|
| 6352 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 6353 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6354 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 6355 |
if ( |
if ( |
| 6356 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 6358 |
#endif |
#endif |
| 6359 |
(md->ctypes[c] & ctype_digit) != 0 |
(md->ctypes[c] & ctype_digit) != 0 |
| 6360 |
) |
) |
| 6361 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6362 |
ecode++; |
ecode++; |
| 6363 |
break; |
break; |
| 6364 |
|
|
| 6365 |
case OP_DIGIT: |
case OP_DIGIT: |
| 6366 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6367 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 6368 |
if ( |
if ( |
| 6369 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 6371 |
#endif |
#endif |
| 6372 |
(md->ctypes[c] & ctype_digit) == 0 |
(md->ctypes[c] & ctype_digit) == 0 |
| 6373 |
) |
) |
| 6374 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6375 |
ecode++; |
ecode++; |
| 6376 |
break; |
break; |
| 6377 |
|
|
| 6378 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 6379 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6380 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 6381 |
if ( |
if ( |
| 6382 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 6384 |
#endif |
#endif |
| 6385 |
(md->ctypes[c] & ctype_space) != 0 |
(md->ctypes[c] & ctype_space) != 0 |
| 6386 |
) |
) |
| 6387 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6388 |
ecode++; |
ecode++; |
| 6389 |
break; |
break; |
| 6390 |
|
|
| 6391 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
| 6392 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6393 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 6394 |
if ( |
if ( |
| 6395 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 6397 |
#endif |
#endif |
| 6398 |
(md->ctypes[c] & ctype_space) == 0 |
(md->ctypes[c] & ctype_space) == 0 |
| 6399 |
) |
) |
| 6400 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6401 |
ecode++; |
ecode++; |
| 6402 |
break; |
break; |
| 6403 |
|
|
| 6404 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 6405 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6406 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 6407 |
if ( |
if ( |
| 6408 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 6410 |
#endif |
#endif |
| 6411 |
(md->ctypes[c] & ctype_word) != 0 |
(md->ctypes[c] & ctype_word) != 0 |
| 6412 |
) |
) |
| 6413 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6414 |
ecode++; |
ecode++; |
| 6415 |
break; |
break; |
| 6416 |
|
|
| 6417 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 6418 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6419 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 6420 |
if ( |
if ( |
| 6421 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 6423 |
#endif |
#endif |
| 6424 |
(md->ctypes[c] & ctype_word) == 0 |
(md->ctypes[c] & ctype_word) == 0 |
| 6425 |
) |
) |
| 6426 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6427 |
ecode++; |
ecode++; |
| 6428 |
break; |
break; |
| 6429 |
|
|
| 6437 |
|
|
| 6438 |
case OP_REF: |
case OP_REF: |
| 6439 |
{ |
{ |
| 6440 |
int length; |
offset = GET2(ecode, 1) << 1; /* Doubled ref number */ |
|
int offset = GET2(ecode, 1) << 1; /* Doubled ref number */ |
|
| 6441 |
ecode += 3; /* Advance past item */ |
ecode += 3; /* Advance past item */ |
| 6442 |
|
|
| 6443 |
/* If the reference is unset, set the length to be longer than the amount |
/* If the reference is unset, set the length to be longer than the amount |
| 6476 |
break; |
break; |
| 6477 |
|
|
| 6478 |
default: /* No repeat follows */ |
default: /* No repeat follows */ |
| 6479 |
if (!match_ref(offset, eptr, length, md, ims)) return MATCH_NOMATCH; |
if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH); |
| 6480 |
eptr += length; |
eptr += length; |
| 6481 |
continue; /* With the main loop */ |
continue; /* With the main loop */ |
| 6482 |
} |
} |
| 6492 |
|
|
| 6493 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 6494 |
{ |
{ |
| 6495 |
if (!match_ref(offset, eptr, length, md, ims)) return MATCH_NOMATCH; |
if (!match_ref(offset, eptr, length, md, ims)) RRETURN(MATCH_NOMATCH); |
| 6496 |
eptr += length; |
eptr += length; |
| 6497 |
} |
} |
| 6498 |
|
|
| 6505 |
|
|
| 6506 |
if (minimize) |
if (minimize) |
| 6507 |
{ |
{ |
| 6508 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 6509 |
{ |
{ |
| 6510 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6511 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6512 |
if (i >= max || !match_ref(offset, eptr, length, md, ims)) |
if (fi >= max || !match_ref(offset, eptr, length, md, ims)) |
| 6513 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6514 |
eptr += length; |
eptr += length; |
| 6515 |
} |
} |
| 6516 |
/* Control never gets here */ |
/* Control never gets here */ |
| 6520 |
|
|
| 6521 |
else |
else |
| 6522 |
{ |
{ |
| 6523 |
const uschar *pp = eptr; |
pp = eptr; |
| 6524 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 6525 |
{ |
{ |
| 6526 |
if (!match_ref(offset, eptr, length, md, ims)) break; |
if (!match_ref(offset, eptr, length, md, ims)) break; |
| 6528 |
} |
} |
| 6529 |
while (eptr >= pp) |
while (eptr >= pp) |
| 6530 |
{ |
{ |
| 6531 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6532 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6533 |
eptr -= length; |
eptr -= length; |
| 6534 |
} |
} |
| 6535 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6536 |
} |
} |
| 6537 |
} |
} |
| 6538 |
/* Control never gets here */ |
/* Control never gets here */ |
| 6551 |
case OP_NCLASS: |
case OP_NCLASS: |
| 6552 |
case OP_CLASS: |
case OP_CLASS: |
| 6553 |
{ |
{ |
| 6554 |
const uschar *data = ecode + 1; /* Save for matching */ |
data = ecode + 1; /* Save for matching */ |
| 6555 |
ecode += 33; /* Advance past the item */ |
ecode += 33; /* Advance past the item */ |
| 6556 |
|
|
| 6557 |
switch (*ecode) |
switch (*ecode) |
| 6591 |
{ |
{ |
| 6592 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 6593 |
{ |
{ |
| 6594 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6595 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
| 6596 |
if (c > 255) |
if (c > 255) |
| 6597 |
{ |
{ |
| 6598 |
if (op == OP_CLASS) return MATCH_NOMATCH; |
if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); |
| 6599 |
} |
} |
| 6600 |
else |
else |
| 6601 |
{ |
{ |
| 6602 |
if ((data[c/8] & (1 << (c&7))) == 0) return MATCH_NOMATCH; |
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); |
| 6603 |
} |
} |
| 6604 |
} |
} |
| 6605 |
} |
} |
| 6609 |
{ |
{ |
| 6610 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 6611 |
{ |
{ |
| 6612 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6613 |
c = *eptr++; |
c = *eptr++; |
| 6614 |
if ((data[c/8] & (1 << (c&7))) == 0) return MATCH_NOMATCH; |
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); |
| 6615 |
} |
} |
| 6616 |
} |
} |
| 6617 |
|
|
| 6629 |
/* UTF-8 mode */ |
/* UTF-8 mode */ |
| 6630 |
if (md->utf8) |
if (md->utf8) |
| 6631 |
{ |
{ |
| 6632 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 6633 |
{ |
{ |
| 6634 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6635 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6636 |
if (i >= max || eptr >= md->end_subject) return MATCH_NOMATCH; |
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6637 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
| 6638 |
if (c > 255) |
if (c > 255) |
| 6639 |
{ |
{ |
| 6640 |
if (op == OP_CLASS) return MATCH_NOMATCH; |
if (op == OP_CLASS) RRETURN(MATCH_NOMATCH); |
| 6641 |
} |
} |
| 6642 |
else |
else |
| 6643 |
{ |
{ |
| 6644 |
if ((data[c/8] & (1 << (c&7))) == 0) return MATCH_NOMATCH; |
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); |
| 6645 |
} |
} |
| 6646 |
} |
} |
| 6647 |
} |
} |
| 6649 |
#endif |
#endif |
| 6650 |
/* Not UTF-8 mode */ |
/* Not UTF-8 mode */ |
| 6651 |
{ |
{ |
| 6652 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 6653 |
{ |
{ |
| 6654 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6655 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6656 |
if (i >= max || eptr >= md->end_subject) return MATCH_NOMATCH; |
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6657 |
c = *eptr++; |
c = *eptr++; |
| 6658 |
if ((data[c/8] & (1 << (c&7))) == 0) return MATCH_NOMATCH; |
if ((data[c/8] & (1 << (c&7))) == 0) RRETURN(MATCH_NOMATCH); |
| 6659 |
} |
} |
| 6660 |
} |
} |
| 6661 |
/* Control never gets here */ |
/* Control never gets here */ |
| 6665 |
|
|
| 6666 |
else |
else |
| 6667 |
{ |
{ |
| 6668 |
const uschar *pp = eptr; |
pp = eptr; |
| 6669 |
|
|
| 6670 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 6671 |
/* UTF-8 mode */ |
/* UTF-8 mode */ |
| 6688 |
} |
} |
| 6689 |
for (;;) |
for (;;) |
| 6690 |
{ |
{ |
| 6691 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6692 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6693 |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
| 6694 |
BACKCHAR(eptr); |
BACKCHAR(eptr); |
| 6695 |
} |
} |
| 6707 |
} |
} |
| 6708 |
while (eptr >= pp) |
while (eptr >= pp) |
| 6709 |
{ |
{ |
| 6710 |
if ((rrc = match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6711 |
MATCH_NOMATCH) return rrc; |
eptr--; |
| 6712 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6713 |
} |
} |
| 6714 |
} |
} |
| 6715 |
|
|
| 6716 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6717 |
} |
} |
| 6718 |
} |
} |
| 6719 |
/* Control never gets here */ |
/* Control never gets here */ |
| 6725 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 6726 |
case OP_XCLASS: |
case OP_XCLASS: |
| 6727 |
{ |
{ |
| 6728 |
const uschar *data = ecode + 1 + LINK_SIZE; /* Save for matching */ |
data = ecode + 1 + LINK_SIZE; /* Save for matching */ |
| 6729 |
ecode += GET(ecode, 1); /* Advance past the item */ |
ecode += GET(ecode, 1); /* Advance past the item */ |
| 6730 |
|
|
| 6731 |
switch (*ecode) |
switch (*ecode) |
| 6761 |
|
|
| 6762 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 6763 |
{ |
{ |
| 6764 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6765 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
| 6766 |
if (!match_xclass(c, data)) return MATCH_NOMATCH; |
if (!match_xclass(c, data)) RRETURN(MATCH_NOMATCH); |
| 6767 |
} |
} |
| 6768 |
|
|
| 6769 |
/* If max == min we can continue with the main loop without the |
/* If max == min we can continue with the main loop without the |
| 6776 |
|
|
| 6777 |
if (minimize) |
if (minimize) |
| 6778 |
{ |
{ |
| 6779 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 6780 |
{ |
{ |
| 6781 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6782 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6783 |
if (i >= max || eptr >= md->end_subject) return MATCH_NOMATCH; |
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 6784 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
| 6785 |
if (!match_xclass(c, data)) return MATCH_NOMATCH; |
if (!match_xclass(c, data)) RRETURN(MATCH_NOMATCH); |
| 6786 |
} |
} |
| 6787 |
/* Control never gets here */ |
/* Control never gets here */ |
| 6788 |
} |
} |
| 6791 |
|
|
| 6792 |
else |
else |
| 6793 |
{ |
{ |
| 6794 |
const uschar *pp = eptr; |
pp = eptr; |
| 6795 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 6796 |
{ |
{ |
| 6797 |
int len = 1; |
int len = 1; |
| 6802 |
} |
} |
| 6803 |
for(;;) |
for(;;) |
| 6804 |
{ |
{ |
| 6805 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6806 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6807 |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
| 6808 |
BACKCHAR(eptr) |
BACKCHAR(eptr) |
| 6809 |
} |
} |
| 6810 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6811 |
} |
} |
| 6812 |
|
|
| 6813 |
/* Control never gets here */ |
/* Control never gets here */ |
| 6818 |
|
|
| 6819 |
case OP_CHARS: |
case OP_CHARS: |
| 6820 |
{ |
{ |
| 6821 |
register int length = ecode[1]; |
register int slen = ecode[1]; |
| 6822 |
ecode += 2; |
ecode += 2; |
| 6823 |
|
|
| 6824 |
#ifdef DEBUG /* Sigh. Some compilers never learn. */ |
#ifdef DEBUG /* Sigh. Some compilers never learn. */ |
| 6827 |
else |
else |
| 6828 |
{ |
{ |
| 6829 |
printf("matching subject "); |
printf("matching subject "); |
| 6830 |
pchars(eptr, length, TRUE, md); |
pchars(eptr, slen, TRUE, md); |
| 6831 |
printf(" against pattern "); |
printf(" against pattern "); |
| 6832 |
} |
} |
| 6833 |
pchars(ecode, length, FALSE, md); |
pchars(ecode, slen, FALSE, md); |
| 6834 |
printf("\n"); |
printf("\n"); |
| 6835 |
#endif |
#endif |
| 6836 |
|
|
| 6837 |
if (length > md->end_subject - eptr) return MATCH_NOMATCH; |
if (slen > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); |
| 6838 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
| 6839 |
{ |
{ |
| 6840 |
while (length-- > 0) |
while (slen-- > 0) |
| 6841 |
if (md->lcc[*ecode++] != md->lcc[*eptr++]) |
if (md->lcc[*ecode++] != md->lcc[*eptr++]) |
| 6842 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6843 |
} |
} |
| 6844 |
else |
else |
| 6845 |
{ |
{ |
| 6846 |
while (length-- > 0) if (*ecode++ != *eptr++) return MATCH_NOMATCH; |
while (slen-- > 0) if (*ecode++ != *eptr++) RRETURN(MATCH_NOMATCH); |
| 6847 |
} |
} |
| 6848 |
} |
} |
| 6849 |
break; |
break; |
| 6883 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 6884 |
if (md->utf8) |
if (md->utf8) |
| 6885 |
{ |
{ |
| 6886 |
int len = 1; |
length = 1; |
| 6887 |
const uschar *charptr = ecode; |
charptr = ecode; |
| 6888 |
GETCHARLEN(c, ecode, len); |
GETCHARLEN(fc, ecode, length); |
| 6889 |
if (min * len > md->end_subject - eptr) return MATCH_NOMATCH; |
if (min * length > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); |
| 6890 |
ecode += len; |
ecode += length; |
| 6891 |
|
|
| 6892 |
/* Handle multibyte character matching specially here. There is no |
/* Handle multibyte character matching specially here. There is no |
| 6893 |
support for any kind of casing for multibyte characters. */ |
support for any kind of casing for multibyte characters. */ |
| 6894 |
|
|
| 6895 |
if (len > 1) |
if (length > 1) |
| 6896 |
{ |
{ |
| 6897 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 6898 |
{ |
{ |
| 6899 |
if (memcmp(eptr, charptr, len) != 0) return MATCH_NOMATCH; |
if (memcmp(eptr, charptr, length) != 0) RRETURN(MATCH_NOMATCH); |
| 6900 |
eptr += len; |
eptr += length; |
| 6901 |
} |
} |
| 6902 |
|
|
| 6903 |
if (min == max) continue; |
if (min == max) continue; |
| 6904 |
|
|
| 6905 |
if (minimize) |
if (minimize) |
| 6906 |
{ |
{ |
| 6907 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 6908 |
{ |
{ |
| 6909 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6910 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6911 |
if (i >= max || |
if (fi >= max || |
| 6912 |
eptr >= md->end_subject || |
eptr >= md->end_subject || |
| 6913 |
memcmp(eptr, charptr, len) != 0) |
memcmp(eptr, charptr, length) != 0) |
| 6914 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6915 |
eptr += len; |
eptr += length; |
| 6916 |
} |
} |
| 6917 |
/* Control never gets here */ |
/* Control never gets here */ |
| 6918 |
} |
} |
| 6919 |
else |
else |
| 6920 |
{ |
{ |
| 6921 |
const uschar *pp = eptr; |
pp = eptr; |
| 6922 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 6923 |
{ |
{ |
| 6924 |
if (eptr > md->end_subject - len || |
if (eptr > md->end_subject - length || |
| 6925 |
memcmp(eptr, charptr, len) != 0) |
memcmp(eptr, charptr, length) != 0) |
| 6926 |
break; |
break; |
| 6927 |
eptr += len; |
eptr += length; |
| 6928 |
} |
} |
| 6929 |
while (eptr >= pp) |
while (eptr >= pp) |
| 6930 |
{ |
{ |
| 6931 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6932 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6933 |
eptr -= len; |
eptr -= length; |
| 6934 |
} |
} |
| 6935 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6936 |
} |
} |
| 6937 |
/* Control never gets here */ |
/* Control never gets here */ |
| 6938 |
} |
} |
| 6939 |
|
|
| 6940 |
/* If the length of a UTF-8 character is 1, we fall through here, and |
/* If the length of a UTF-8 character is 1, we fall through here, and |
| 6941 |
obey the code as for non-UTF-8 characters below, though in this case the |
obey the code as for non-UTF-8 characters below, though in this case the |
| 6942 |
value of c will always be < 128. */ |
value of fc will always be < 128. */ |
| 6943 |
} |
} |
| 6944 |
else |
else |
| 6945 |
#endif |
#endif |
| 6946 |
|
|
| 6947 |
/* When not in UTF-8 mode, load a single-byte character. */ |
/* When not in UTF-8 mode, load a single-byte character. */ |
| 6948 |
{ |
{ |
| 6949 |
if (min > md->end_subject - eptr) return MATCH_NOMATCH; |
if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); |
| 6950 |
c = *ecode++; |
fc = *ecode++; |
| 6951 |
} |
} |
| 6952 |
|
|
| 6953 |
/* The value of c at this point is always less than 256, though we may or |
/* The value of fc at this point is always less than 256, though we may or |
| 6954 |
may not be in UTF-8 mode. The code is duplicated for the caseless and |
may not be in UTF-8 mode. The code is duplicated for the caseless and |
| 6955 |
caseful cases, for speed, since matching characters is likely to be quite |
caseful cases, for speed, since matching characters is likely to be quite |
| 6956 |
common. First, ensure the minimum number of matches are present. If min = |
common. First, ensure the minimum number of matches are present. If min = |
| 6959 |
matching character if failing, up to the maximum. Alternatively, if |
matching character if failing, up to the maximum. Alternatively, if |
| 6960 |
maximizing, find the maximum number of characters and work backwards. */ |
maximizing, find the maximum number of characters and work backwards. */ |
| 6961 |
|
|
| 6962 |
DPRINTF(("matching %c{%d,%d} against subject %.*s\n", c, min, max, |
DPRINTF(("matching %c{%d,%d} against subject %.*s\n", fc, min, max, |
| 6963 |
max, eptr)); |
max, eptr)); |
| 6964 |
|
|
| 6965 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
| 6966 |
{ |
{ |
| 6967 |
c = md->lcc[c]; |
fc = md->lcc[fc]; |
| 6968 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 6969 |
if (c != md->lcc[*eptr++]) return MATCH_NOMATCH; |
if (fc != md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); |
| 6970 |
if (min == max) continue; |
if (min == max) continue; |
| 6971 |
if (minimize) |
if (minimize) |
| 6972 |
{ |
{ |
| 6973 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 6974 |
{ |
{ |
| 6975 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6976 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6977 |
if (i >= max || eptr >= md->end_subject || |
if (fi >= max || eptr >= md->end_subject || |
| 6978 |
c != md->lcc[*eptr++]) |
fc != md->lcc[*eptr++]) |
| 6979 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 6980 |
} |
} |
| 6981 |
/* Control never gets here */ |
/* Control never gets here */ |
| 6982 |
} |
} |
| 6983 |
else |
else |
| 6984 |
{ |
{ |
| 6985 |
const uschar *pp = eptr; |
pp = eptr; |
| 6986 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 6987 |
{ |
{ |
| 6988 |
if (eptr >= md->end_subject || c != md->lcc[*eptr]) break; |
if (eptr >= md->end_subject || fc != md->lcc[*eptr]) break; |
| 6989 |
eptr++; |
eptr++; |
| 6990 |
} |
} |
| 6991 |
while (eptr >= pp) |
while (eptr >= pp) |
| 6992 |
if ((rrc = match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) != |
{ |
| 6993 |
MATCH_NOMATCH) return rrc; |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 6994 |
return MATCH_NOMATCH; |
eptr--; |
| 6995 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 6996 |
|
} |
| 6997 |
|
RRETURN(MATCH_NOMATCH); |
| 6998 |
} |
} |
| 6999 |
/* Control never gets here */ |
/* Control never gets here */ |
| 7000 |
} |
} |
| 7003 |
|
|
| 7004 |
else |
else |
| 7005 |
{ |
{ |
| 7006 |
for (i = 1; i <= min; i++) if (c != *eptr++) return MATCH_NOMATCH; |
for (i = 1; i <= min; i++) if (fc != *eptr++) RRETURN(MATCH_NOMATCH); |
| 7007 |
if (min == max) continue; |
if (min == max) continue; |
| 7008 |
if (minimize) |
if (minimize) |
| 7009 |
{ |
{ |
| 7010 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 7011 |
{ |
{ |
| 7012 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7013 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7014 |
if (i >= max || eptr >= md->end_subject || c != *eptr++) |
if (fi >= max || eptr >= md->end_subject || fc != *eptr++) |
| 7015 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7016 |
} |
} |
| 7017 |
/* Control never gets here */ |
/* Control never gets here */ |
| 7018 |
} |
} |
| 7019 |
else |
else |
| 7020 |
{ |
{ |
| 7021 |
const uschar *pp = eptr; |
pp = eptr; |
| 7022 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 7023 |
{ |
{ |
| 7024 |
if (eptr >= md->end_subject || c != *eptr) break; |
if (eptr >= md->end_subject || fc != *eptr) break; |
| 7025 |
eptr++; |
eptr++; |
| 7026 |
} |
} |
| 7027 |
while (eptr >= pp) |
while (eptr >= pp) |
| 7028 |
if ((rrc = match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) != |
{ |
| 7029 |
MATCH_NOMATCH) return rrc; |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7030 |
return MATCH_NOMATCH; |
eptr--; |
| 7031 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7032 |
|
} |
| 7033 |
|
RRETURN(MATCH_NOMATCH); |
| 7034 |
} |
} |
| 7035 |
} |
} |
| 7036 |
/* Control never gets here */ |
/* Control never gets here */ |
| 7039 |
checking can be multibyte. */ |
checking can be multibyte. */ |
| 7040 |
|
|
| 7041 |
case OP_NOT: |
case OP_NOT: |
| 7042 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 7043 |
ecode++; |
ecode++; |
| 7044 |
GETCHARINCTEST(c, eptr); |
GETCHARINCTEST(c, eptr); |
| 7045 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
| 7048 |
if (c < 256) |
if (c < 256) |
| 7049 |
#endif |
#endif |
| 7050 |
c = md->lcc[c]; |
c = md->lcc[c]; |
| 7051 |
if (md->lcc[*ecode++] == c) return MATCH_NOMATCH; |
if (md->lcc[*ecode++] == c) RRETURN(MATCH_NOMATCH); |
| 7052 |
} |
} |
| 7053 |
else |
else |
| 7054 |
{ |
{ |
| 7055 |
if (*ecode++ == c) return MATCH_NOMATCH; |
if (*ecode++ == c) RRETURN(MATCH_NOMATCH); |
| 7056 |
} |
} |
| 7057 |
break; |
break; |
| 7058 |
|
|
| 7093 |
characters left in the subject. */ |
characters left in the subject. */ |
| 7094 |
|
|
| 7095 |
REPEATNOTCHAR: |
REPEATNOTCHAR: |
| 7096 |
if (min > md->end_subject - eptr) return MATCH_NOMATCH; |
if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); |
| 7097 |
c = *ecode++; |
fc = *ecode++; |
| 7098 |
|
|
| 7099 |
/* The code is duplicated for the caseless and caseful cases, for speed, |
/* The code is duplicated for the caseless and caseful cases, for speed, |
| 7100 |
since matching characters is likely to be quite common. First, ensure the |
since matching characters is likely to be quite common. First, ensure the |
| 7104 |
maximum. Alternatively, if maximizing, find the maximum number of |
maximum. Alternatively, if maximizing, find the maximum number of |
| 7105 |
characters and work backwards. */ |
characters and work backwards. */ |
| 7106 |
|
|
| 7107 |
DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", c, min, max, |
DPRINTF(("negative matching %c{%d,%d} against subject %.*s\n", fc, min, max, |
| 7108 |
max, eptr)); |
max, eptr)); |
| 7109 |
|
|
| 7110 |
if ((ims & PCRE_CASELESS) != 0) |
if ((ims & PCRE_CASELESS) != 0) |
| 7111 |
{ |
{ |
| 7112 |
c = md->lcc[c]; |
fc = md->lcc[fc]; |
| 7113 |
|
|
| 7114 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 7115 |
/* UTF-8 mode */ |
/* UTF-8 mode */ |
| 7120 |
{ |
{ |
| 7121 |
GETCHARINC(d, eptr); |
GETCHARINC(d, eptr); |
| 7122 |
if (d < 256) d = md->lcc[d]; |
if (d < 256) d = md->lcc[d]; |
| 7123 |
if (c == d) return MATCH_NOMATCH; |
if (fc == d) RRETURN(MATCH_NOMATCH); |
| 7124 |
} |
} |
| 7125 |
} |
} |
| 7126 |
else |
else |
| 7129 |
/* Not UTF-8 mode */ |
/* Not UTF-8 mode */ |
| 7130 |
{ |
{ |
| 7131 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7132 |
if (c == md->lcc[*eptr++]) return MATCH_NOMATCH; |
if (fc == md->lcc[*eptr++]) RRETURN(MATCH_NOMATCH); |
| 7133 |
} |
} |
| 7134 |
|
|
| 7135 |
if (min == max) continue; |
if (min == max) continue; |
| 7141 |
if (md->utf8) |
if (md->utf8) |
| 7142 |
{ |
{ |
| 7143 |
register int d; |
register int d; |
| 7144 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 7145 |
{ |
{ |
| 7146 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7147 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7148 |
GETCHARINC(d, eptr); |
GETCHARINC(d, eptr); |
| 7149 |
if (d < 256) d = md->lcc[d]; |
if (d < 256) d = md->lcc[d]; |
| 7150 |
if (i >= max || eptr >= md->end_subject || c == d) |
if (fi >= max || eptr >= md->end_subject || fc == d) |
| 7151 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7152 |
} |
} |
| 7153 |
} |
} |
| 7154 |
else |
else |
| 7155 |
#endif |
#endif |
| 7156 |
/* Not UTF-8 mode */ |
/* Not UTF-8 mode */ |
| 7157 |
{ |
{ |
| 7158 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 7159 |
{ |
{ |
| 7160 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7161 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7162 |
if (i >= max || eptr >= md->end_subject || c == md->lcc[*eptr++]) |
if (fi >= max || eptr >= md->end_subject || fc == md->lcc[*eptr++]) |
| 7163 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7164 |
} |
} |
| 7165 |
} |
} |
| 7166 |
/* Control never gets here */ |
/* Control never gets here */ |
| 7170 |
|
|
| 7171 |
else |
else |
| 7172 |
{ |
{ |
| 7173 |
const uschar *pp = eptr; |
pp = eptr; |
| 7174 |
|
|
| 7175 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 7176 |
/* UTF-8 mode */ |
/* UTF-8 mode */ |
| 7183 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
| 7184 |
GETCHARLEN(d, eptr, len); |
GETCHARLEN(d, eptr, len); |
| 7185 |
if (d < 256) d = md->lcc[d]; |
if (d < 256) d = md->lcc[d]; |
| 7186 |
if (c == d) break; |
if (fc == d) break; |
| 7187 |
eptr += len; |
eptr += len; |
| 7188 |
} |
} |
| 7189 |
for(;;) |
for(;;) |
| 7190 |
{ |
{ |
| 7191 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7192 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7193 |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
| 7194 |
BACKCHAR(eptr); |
BACKCHAR(eptr); |
| 7195 |
} |
} |
| 7200 |
{ |
{ |
| 7201 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 7202 |
{ |
{ |
| 7203 |
if (eptr >= md->end_subject || c == md->lcc[*eptr]) break; |
if (eptr >= md->end_subject || fc == md->lcc[*eptr]) break; |
| 7204 |
eptr++; |
eptr++; |
| 7205 |
} |
} |
| 7206 |
while (eptr >= pp) |
while (eptr >= pp) |
| 7207 |
{ |
{ |
| 7208 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7209 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7210 |
eptr--; |
eptr--; |
| 7211 |
} |
} |
| 7212 |
} |
} |
| 7213 |
|
|
| 7214 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7215 |
} |
} |
| 7216 |
/* Control never gets here */ |
/* Control never gets here */ |
| 7217 |
} |
} |
| 7228 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7229 |
{ |
{ |
| 7230 |
GETCHARINC(d, eptr); |
GETCHARINC(d, eptr); |
| 7231 |
if (c == d) return MATCH_NOMATCH; |
if (fc == d) RRETURN(MATCH_NOMATCH); |
| 7232 |
} |
} |
| 7233 |
} |
} |
| 7234 |
else |
else |
| 7236 |
/* Not UTF-8 mode */ |
/* Not UTF-8 mode */ |
| 7237 |
{ |
{ |
| 7238 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7239 |
if (c == *eptr++) return MATCH_NOMATCH; |
if (fc == *eptr++) RRETURN(MATCH_NOMATCH); |
| 7240 |
} |
} |
| 7241 |
|
|
| 7242 |
if (min == max) continue; |
if (min == max) continue; |
| 7248 |
if (md->utf8) |
if (md->utf8) |
| 7249 |
{ |
{ |
| 7250 |
register int d; |
register int d; |
| 7251 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 7252 |
{ |
{ |
| 7253 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7254 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7255 |
GETCHARINC(d, eptr); |
GETCHARINC(d, eptr); |
| 7256 |
if (i >= max || eptr >= md->end_subject || c == d) |
if (fi >= max || eptr >= md->end_subject || fc == d) |
| 7257 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7258 |
} |
} |
| 7259 |
} |
} |
| 7260 |
else |
else |
| 7261 |
#endif |
#endif |
| 7262 |
/* Not UTF-8 mode */ |
/* Not UTF-8 mode */ |
| 7263 |
{ |
{ |
| 7264 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 7265 |
{ |
{ |
| 7266 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7267 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7268 |
if (i >= max || eptr >= md->end_subject || c == *eptr++) |
if (fi >= max || eptr >= md->end_subject || fc == *eptr++) |
| 7269 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7270 |
} |
} |
| 7271 |
} |
} |
| 7272 |
/* Control never gets here */ |
/* Control never gets here */ |
| 7276 |
|
|
| 7277 |
else |
else |
| 7278 |
{ |
{ |
| 7279 |
const uschar *pp = eptr; |
pp = eptr; |
| 7280 |
|
|
| 7281 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 7282 |
/* UTF-8 mode */ |
/* UTF-8 mode */ |
| 7288 |
int len = 1; |
int len = 1; |
| 7289 |
if (eptr >= md->end_subject) break; |
if (eptr >= md->end_subject) break; |
| 7290 |
GETCHARLEN(d, eptr, len); |
GETCHARLEN(d, eptr, len); |
| 7291 |
if (c == d) break; |
if (fc == d) break; |
| 7292 |
eptr += len; |
eptr += len; |
| 7293 |
} |
} |
| 7294 |
for(;;) |
for(;;) |
| 7295 |
{ |
{ |
| 7296 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7297 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7298 |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
| 7299 |
BACKCHAR(eptr); |
BACKCHAR(eptr); |
| 7300 |
} |
} |
| 7305 |
{ |
{ |
| 7306 |
for (i = min; i < max; i++) |
for (i = min; i < max; i++) |
| 7307 |
{ |
{ |
| 7308 |
if (eptr >= md->end_subject || c == *eptr) break; |
if (eptr >= md->end_subject || fc == *eptr) break; |
| 7309 |
eptr++; |
eptr++; |
| 7310 |
} |
} |
| 7311 |
while (eptr >= pp) |
while (eptr >= pp) |
| 7312 |
{ |
{ |
| 7313 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7314 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7315 |
eptr--; |
eptr--; |
| 7316 |
} |
} |
| 7317 |
} |
} |
| 7318 |
|
|
| 7319 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7320 |
} |
} |
| 7321 |
} |
} |
| 7322 |
/* Control never gets here */ |
/* Control never gets here */ |
| 7365 |
UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that |
UTF-8 mode, but it does no harm. Separate the UTF-8 code completely as that |
| 7366 |
is tidier. */ |
is tidier. */ |
| 7367 |
|
|
| 7368 |
if (min > md->end_subject - eptr) return MATCH_NOMATCH; |
if (min > md->end_subject - eptr) RRETURN(MATCH_NOMATCH); |
| 7369 |
if (min > 0) |
if (min > 0) |
| 7370 |
{ |
{ |
| 7371 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 7376 |
{ |
{ |
| 7377 |
if (eptr >= md->end_subject || |
if (eptr >= md->end_subject || |
| 7378 |
(*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0)) |
(*eptr++ == NEWLINE && (ims & PCRE_DOTALL) == 0)) |
| 7379 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7380 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 7381 |
} |
} |
| 7382 |
break; |
break; |
| 7388 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 7389 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7390 |
{ |
{ |
| 7391 |
if (eptr >= md->end_subject) return MATCH_NOMATCH; |
if (eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 7392 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
| 7393 |
if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) |
if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) |
| 7394 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7395 |
} |
} |
| 7396 |
break; |
break; |
| 7397 |
|
|
| 7400 |
{ |
{ |
| 7401 |
if (eptr >= md->end_subject || |
if (eptr >= md->end_subject || |
| 7402 |
*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) |
*eptr >= 128 || (md->ctypes[*eptr++] & ctype_digit) == 0) |
| 7403 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7404 |
/* No need to skip more bytes - we know it's a 1-byte character */ |
/* No need to skip more bytes - we know it's a 1-byte character */ |
| 7405 |
} |
} |
| 7406 |
break; |
break; |
| 7410 |
{ |
{ |
| 7411 |
if (eptr >= md->end_subject || |
if (eptr >= md->end_subject || |
| 7412 |
(*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0)) |
(*eptr < 128 && (md->ctypes[*eptr++] & ctype_space) != 0)) |
| 7413 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7414 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 7415 |
} |
} |
| 7416 |
break; |
break; |
| 7420 |
{ |
{ |
| 7421 |
if (eptr >= md->end_subject || |
if (eptr >= md->end_subject || |
| 7422 |
*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) |
*eptr >= 128 || (md->ctypes[*eptr++] & ctype_space) == 0) |
| 7423 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7424 |
/* No need to skip more bytes - we know it's a 1-byte character */ |
/* No need to skip more bytes - we know it's a 1-byte character */ |
| 7425 |
} |
} |
| 7426 |
break; |
break; |
| 7430 |
{ |
{ |
| 7431 |
if (eptr >= md->end_subject || |
if (eptr >= md->end_subject || |
| 7432 |
(*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0)) |
(*eptr < 128 && (md->ctypes[*eptr++] & ctype_word) != 0)) |
| 7433 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7434 |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
while (eptr < md->end_subject && (*eptr & 0xc0) == 0x80) eptr++; |
| 7435 |
} |
} |
| 7436 |
break; |
break; |
| 7440 |
{ |
{ |
| 7441 |
if (eptr >= md->end_subject || |
if (eptr >= md->end_subject || |
| 7442 |
*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) |
*eptr >= 128 || (md->ctypes[*eptr++] & ctype_word) == 0) |
| 7443 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7444 |
/* No need to skip more bytes - we know it's a 1-byte character */ |
/* No need to skip more bytes - we know it's a 1-byte character */ |
| 7445 |
} |
} |
| 7446 |
break; |
break; |
| 7456 |
if ((ims & PCRE_DOTALL) == 0) |
if ((ims & PCRE_DOTALL) == 0) |
| 7457 |
{ |
{ |
| 7458 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7459 |
if (*eptr++ == NEWLINE) return MATCH_NOMATCH; |
if (*eptr++ == NEWLINE) RRETURN(MATCH_NOMATCH); |
| 7460 |
} |
} |
| 7461 |
else eptr += min; |
else eptr += min; |
| 7462 |
break; |
break; |
| 7467 |
|
|
| 7468 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 7469 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7470 |
if ((md->ctypes[*eptr++] & ctype_digit) != 0) return MATCH_NOMATCH; |
if ((md->ctypes[*eptr++] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); |
| 7471 |
break; |
break; |
| 7472 |
|
|
| 7473 |
case OP_DIGIT: |
case OP_DIGIT: |
| 7474 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7475 |
if ((md->ctypes[*eptr++] & ctype_digit) == 0) return MATCH_NOMATCH; |
if ((md->ctypes[*eptr++] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); |
| 7476 |
break; |
break; |
| 7477 |
|
|
| 7478 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 7479 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7480 |
if ((md->ctypes[*eptr++] & ctype_space) != 0) return MATCH_NOMATCH; |
if ((md->ctypes[*eptr++] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); |
| 7481 |
break; |
break; |
| 7482 |
|
|
| 7483 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
| 7484 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7485 |
if ((md->ctypes[*eptr++] & ctype_space) == 0) return MATCH_NOMATCH; |
if ((md->ctypes[*eptr++] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); |
| 7486 |
break; |
break; |
| 7487 |
|
|
| 7488 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 7489 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7490 |
if ((md->ctypes[*eptr++] & ctype_word) != 0) |
if ((md->ctypes[*eptr++] & ctype_word) != 0) |
| 7491 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7492 |
break; |
break; |
| 7493 |
|
|
| 7494 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 7495 |
for (i = 1; i <= min; i++) |
for (i = 1; i <= min; i++) |
| 7496 |
if ((md->ctypes[*eptr++] & ctype_word) == 0) |
if ((md->ctypes[*eptr++] & ctype_word) == 0) |
| 7497 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7498 |
break; |
break; |
| 7499 |
} |
} |
| 7500 |
} |
} |
| 7512 |
/* UTF-8 mode */ |
/* UTF-8 mode */ |
| 7513 |
if (md->utf8) |
if (md->utf8) |
| 7514 |
{ |
{ |
| 7515 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 7516 |
{ |
{ |
| 7517 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7518 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7519 |
if (i >= max || eptr >= md->end_subject) return MATCH_NOMATCH; |
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 7520 |
|
|
| 7521 |
GETCHARINC(c, eptr); |
GETCHARINC(c, eptr); |
| 7522 |
switch(ctype) |
switch(ctype) |
| 7523 |
{ |
{ |
| 7524 |
case OP_ANY: |
case OP_ANY: |
| 7525 |
if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) return MATCH_NOMATCH; |
if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH); |
| 7526 |
break; |
break; |
| 7527 |
|
|
| 7528 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 7530 |
|
|
| 7531 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 7532 |
if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) |
if (c < 256 && (md->ctypes[c] & ctype_digit) != 0) |
| 7533 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7534 |
break; |
break; |
| 7535 |
|
|
| 7536 |
case OP_DIGIT: |
case OP_DIGIT: |
| 7537 |
if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0) |
if (c >= 256 || (md->ctypes[c] & ctype_digit) == 0) |
| 7538 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7539 |
break; |
break; |
| 7540 |
|
|
| 7541 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 7542 |
if (c < 256 && (md->ctypes[c] & ctype_space) != 0) |
if (c < 256 && (md->ctypes[c] & ctype_space) != 0) |
| 7543 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7544 |
break; |
break; |
| 7545 |
|
|
| 7546 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
| 7547 |
if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) |
if (c >= 256 || (md->ctypes[c] & ctype_space) == 0) |
| 7548 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7549 |
break; |
break; |
| 7550 |
|
|
| 7551 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 7552 |
if (c < 256 && (md->ctypes[c] & ctype_word) != 0) |
if (c < 256 && (md->ctypes[c] & ctype_word) != 0) |
| 7553 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7554 |
break; |
break; |
| 7555 |
|
|
| 7556 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 7557 |
if (c >= 256 && (md->ctypes[c] & ctype_word) == 0) |
if (c >= 256 && (md->ctypes[c] & ctype_word) == 0) |
| 7558 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7559 |
break; |
break; |
| 7560 |
} |
} |
| 7561 |
} |
} |
| 7564 |
#endif |
#endif |
| 7565 |
/* Not UTF-8 mode */ |
/* Not UTF-8 mode */ |
| 7566 |
{ |
{ |
| 7567 |
for (i = min;; i++) |
for (fi = min;; fi++) |
| 7568 |
{ |
{ |
| 7569 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7570 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7571 |
if (i >= max || eptr >= md->end_subject) return MATCH_NOMATCH; |
if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); |
| 7572 |
c = *eptr++; |
c = *eptr++; |
| 7573 |
switch(ctype) |
switch(ctype) |
| 7574 |
{ |
{ |
| 7575 |
case OP_ANY: |
case OP_ANY: |
| 7576 |
if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) return MATCH_NOMATCH; |
if ((ims & PCRE_DOTALL) == 0 && c == NEWLINE) RRETURN(MATCH_NOMATCH); |
| 7577 |
break; |
break; |
| 7578 |
|
|
| 7579 |
case OP_ANYBYTE: |
case OP_ANYBYTE: |
| 7580 |
break; |
break; |
| 7581 |
|
|
| 7582 |
case OP_NOT_DIGIT: |
case OP_NOT_DIGIT: |
| 7583 |
if ((md->ctypes[c] & ctype_digit) != 0) return MATCH_NOMATCH; |
if ((md->ctypes[c] & ctype_digit) != 0) RRETURN(MATCH_NOMATCH); |
| 7584 |
break; |
break; |
| 7585 |
|
|
| 7586 |
case OP_DIGIT: |
case OP_DIGIT: |
| 7587 |
if ((md->ctypes[c] & ctype_digit) == 0) return MATCH_NOMATCH; |
if ((md->ctypes[c] & ctype_digit) == 0) RRETURN(MATCH_NOMATCH); |
| 7588 |
break; |
break; |
| 7589 |
|
|
| 7590 |
case OP_NOT_WHITESPACE: |
case OP_NOT_WHITESPACE: |
| 7591 |
if ((md->ctypes[c] & ctype_space) != 0) return MATCH_NOMATCH; |
if ((md->ctypes[c] & ctype_space) != 0) RRETURN(MATCH_NOMATCH); |
| 7592 |
break; |
break; |
| 7593 |
|
|
| 7594 |
case OP_WHITESPACE: |
case OP_WHITESPACE: |
| 7595 |
if ((md->ctypes[c] & ctype_space) == 0) return MATCH_NOMATCH; |
if ((md->ctypes[c] & ctype_space) == 0) RRETURN(MATCH_NOMATCH); |
| 7596 |
break; |
break; |
| 7597 |
|
|
| 7598 |
case OP_NOT_WORDCHAR: |
case OP_NOT_WORDCHAR: |
| 7599 |
if ((md->ctypes[c] & ctype_word) != 0) return MATCH_NOMATCH; |
if ((md->ctypes[c] & ctype_word) != 0) RRETURN(MATCH_NOMATCH); |
| 7600 |
break; |
break; |
| 7601 |
|
|
| 7602 |
case OP_WORDCHAR: |
case OP_WORDCHAR: |
| 7603 |
if ((md->ctypes[c] & ctype_word) == 0) return MATCH_NOMATCH; |
if ((md->ctypes[c] & ctype_word) == 0) RRETURN(MATCH_NOMATCH); |
| 7604 |
break; |
break; |
| 7605 |
} |
} |
| 7606 |
} |
} |
| 7614 |
|
|
| 7615 |
else |
else |
| 7616 |
{ |
{ |
| 7617 |
const uschar *pp = eptr; |
pp = eptr; |
| 7618 |
|
|
| 7619 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 7620 |
/* UTF-8 mode */ |
/* UTF-8 mode */ |
| 7751 |
|
|
| 7752 |
for(;;) |
for(;;) |
| 7753 |
{ |
{ |
| 7754 |
if ((rrc = match(eptr, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7755 |
MATCH_NOMATCH) return rrc; |
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7756 |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
if (eptr-- == pp) break; /* Stop if tried at original pos */ |
| 7757 |
BACKCHAR(eptr); |
BACKCHAR(eptr); |
| 7758 |
} |
} |
| 7841 |
|
|
| 7842 |
while (eptr >= pp) |
while (eptr >= pp) |
| 7843 |
{ |
{ |
| 7844 |
if ((rrc = match(eptr--, ecode, offset_top, md, ims, eptrb, 0)) != |
RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); |
| 7845 |
MATCH_NOMATCH) return rrc; |
eptr--; |
| 7846 |
|
if (rrc != MATCH_NOMATCH) RRETURN(rrc); |
| 7847 |
} |
} |
| 7848 |
} |
} |
| 7849 |
|
|
| 7850 |
/* Get here if we can't make it match with any permitted repetitions */ |
/* Get here if we can't make it match with any permitted repetitions */ |
| 7851 |
|
|
| 7852 |
return MATCH_NOMATCH; |
RRETURN(MATCH_NOMATCH); |
| 7853 |
} |
} |
| 7854 |
/* Control never gets here */ |
/* Control never gets here */ |
| 7855 |
|
|
| 7860 |
|
|
| 7861 |
default: |
default: |
| 7862 |
DPRINTF(("Unknown opcode %d\n", *ecode)); |
DPRINTF(("Unknown opcode %d\n", *ecode)); |
| 7863 |
return PCRE_ERROR_UNKNOWN_NODE; |
RRETURN(PCRE_ERROR_UNKNOWN_NODE); |
| 7864 |
} |
} |
| 7865 |
|
|
| 7866 |
/* Do not stick any code in here without much thought; it is assumed |
/* Do not stick any code in here without much thought; it is assumed |
| 7872 |
} |
} |
| 7873 |
|
|
| 7874 |
|
|
| 7875 |
|
/*************************************************************************** |
| 7876 |
|
**************************************************************************** |
| 7877 |
|
RECURSION IN THE match() FUNCTION |
| 7878 |
|
|
| 7879 |
|
Undefine all the macros that were defined above to handle this. */ |
| 7880 |
|
|
| 7881 |
|
#ifdef NO_RECURSE |
| 7882 |
|
#undef eptr |
| 7883 |
|
#undef ecode |
| 7884 |
|
#undef offset_top |
| 7885 |
|
#undef ims |
| 7886 |
|
#undef eptrb |
| 7887 |
|
#undef flags |
| 7888 |
|
|
| 7889 |
|
#undef callpat |
| 7890 |
|
#undef charptr |
| 7891 |
|
#undef data |
| 7892 |
|
#undef lastptr |
| 7893 |
|
#undef next |
| 7894 |
|
#undef pp |
| 7895 |
|
#undef prev |
| 7896 |
|
#undef saved_eptr |
| 7897 |
|
|
| 7898 |
|
#undef new_recursive |
| 7899 |
|
|
| 7900 |
|
#undef cur_is_word |
| 7901 |
|
#undef condition |
| 7902 |
|
#undef minimize |
| 7903 |
|
#undef prev_is_word |
| 7904 |
|
|
| 7905 |
|
#undef original_ims |
| 7906 |
|
|
| 7907 |
|
#undef ctype |
| 7908 |
|
#undef length |
| 7909 |
|
#undef max |
| 7910 |
|
#undef min |
| 7911 |
|
#undef number |
| 7912 |
|
#undef offset |
| 7913 |
|
#undef op |
| 7914 |
|
#undef save_capture_last |
| 7915 |
|
#undef save_offset1 |
| 7916 |
|
#undef save_offset2 |
| 7917 |
|
#undef save_offset3 |
| 7918 |
|
#undef stacksave |
| 7919 |
|
|
| 7920 |
|
#undef newptrb |
| 7921 |
|
|
| 7922 |
|
#endif |
| 7923 |
|
|
| 7924 |
|
/* These two are defined as macros in both cases */ |
| 7925 |
|
|
| 7926 |
|
#undef fc |
| 7927 |
|
#undef fi |
| 7928 |
|
|
| 7929 |
|
/*************************************************************************** |
| 7930 |
|
***************************************************************************/ |
| 7931 |
|
|
| 7932 |
|
|
| 7933 |
|
|
| 7934 |
/************************************************* |
/************************************************* |
| 7955 |
< -1 => some kind of unexpected problem |
< -1 => some kind of unexpected problem |
| 7956 |
*/ |
*/ |
| 7957 |
|
|
| 7958 |
int |
EXPORT int |
| 7959 |
pcre_exec(const pcre *external_re, const pcre_extra *extra_data, |
pcre_exec(const pcre *external_re, const pcre_extra *extra_data, |
| 7960 |
const char *subject, int length, int start_offset, int options, int *offsets, |
const char *subject, int length, int start_offset, int options, int *offsets, |
| 7961 |
int offsetcount) |
int offsetcount) |
| 8032 |
back the character offset. */ |
back the character offset. */ |
| 8033 |
|
|
| 8034 |
#ifdef SUPPORT_UTF8 |
#ifdef SUPPORT_UTF8 |
| 8035 |
if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 && |
if (match_block.utf8 && (options & PCRE_NO_UTF8_CHECK) == 0) |
| 8036 |
valid_utf8((uschar *)subject, length) >= 0) |
{ |
| 8037 |
return PCRE_ERROR_BADUTF8; |
if (valid_utf8((uschar *)subject, length) >= 0) |
| 8038 |
|
return PCRE_ERROR_BADUTF8; |
| 8039 |
|
if (start_offset > 0 && start_offset < length) |
| 8040 |
|
{ |
| 8041 |
|
int tb = ((uschar *)subject)[start_offset]; |
| 8042 |
|
if (tb > 127) |
| 8043 |
|
{ |
| 8044 |
|
tb &= 0xc0; |
| 8045 |
|
if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET; |
| 8046 |
|
} |
| 8047 |
|
} |
| 8048 |
|
} |
| 8049 |
#endif |
#endif |
| 8050 |
|
|
| 8051 |
/* The ims options can vary during the matching as a result of the presence |
/* The ims options can vary during the matching as a result of the presence |