| 7 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
| 8 |
|
|
| 9 |
Written by Philip Hazel |
Written by Philip Hazel |
| 10 |
Copyright (c) 1997-2005 University of Cambridge |
Copyright (c) 1997-2006 University of Cambridge |
| 11 |
|
|
| 12 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
| 13 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
| 114 |
|
|
| 115 |
typedef unsigned char uschar; |
typedef unsigned char uschar; |
| 116 |
|
|
| 117 |
|
/* When PCRE is compiled as a C++ library, the subject pointer can be replaced |
| 118 |
|
with a custom type. This makes it possible, for example, to allow pcre_exec() |
| 119 |
|
to process subject strings that are discontinuous by using a smart pointer |
| 120 |
|
class. It must always be possible to inspect all of the subject string in |
| 121 |
|
pcre_exec() because of the way it backtracks. Two macros are required in the |
| 122 |
|
normal case, for sign-unspecified and unsigned char pointers. The former is |
| 123 |
|
used for the external interface and appears in pcre.h, which is why its name |
| 124 |
|
must begin with PCRE_. */ |
| 125 |
|
|
| 126 |
|
#ifdef CUSTOM_SUBJECT_PTR |
| 127 |
|
#define PCRE_SPTR CUSTOM_SUBJECT_PTR |
| 128 |
|
#define USPTR CUSTOM_SUBJECT_PTR |
| 129 |
|
#else |
| 130 |
|
#define PCRE_SPTR const char * |
| 131 |
|
#define USPTR const unsigned char * |
| 132 |
|
#endif |
| 133 |
|
|
| 134 |
/* Include the public PCRE header and the definitions of UCP character property |
/* Include the public PCRE header and the definitions of UCP character property |
| 135 |
values. */ |
values. */ |
| 136 |
|
|
| 163 |
void * |
void * |
| 164 |
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n) |
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n) |
| 165 |
{ |
{ |
| 166 |
int i; |
size_t i; |
| 167 |
dest += n; |
dest += n; |
| 168 |
src += n; |
src += n; |
| 169 |
for (i = 0; i < n; ++i) *(--dest) = *(--src); |
for (i = 0; i < n; ++i) *(--dest) = *(--src); |
| 170 |
|
return dest; |
| 171 |
} |
} |
| 172 |
#define memmove(a, b, c) pcre_memmove(a, b, c) |
#define memmove(a, b, c) pcre_memmove(a, b, c) |
| 173 |
#endif /* not HAVE_BCOPY */ |
#endif /* not HAVE_BCOPY */ |
| 461 |
#define ESC_tee '\t' |
#define ESC_tee '\t' |
| 462 |
#endif |
#endif |
| 463 |
|
|
| 464 |
|
/* Codes for different types of Unicode property */ |
| 465 |
|
|
| 466 |
|
#define PT_ANY 0 /* Any property - matches all chars */ |
| 467 |
|
#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ |
| 468 |
|
#define PT_GC 2 /* General characteristic (e.g. L) */ |
| 469 |
|
#define PT_PC 3 /* Particular characteristic (e.g. Lu) */ |
| 470 |
|
#define PT_SC 4 /* Script (e.g. Han) */ |
| 471 |
|
|
| 472 |
|
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that |
| 473 |
|
contain UTF-8 characters with values greater than 255. */ |
| 474 |
|
|
| 475 |
|
#define XCL_NOT 0x01 /* Flag: this is a negative class */ |
| 476 |
|
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ |
| 477 |
|
|
| 478 |
|
#define XCL_END 0 /* Marks end of individual items */ |
| 479 |
|
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ |
| 480 |
|
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ |
| 481 |
|
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ |
| 482 |
|
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ |
| 483 |
|
|
| 484 |
/* These are escaped items that aren't just an encoding of a particular data |
/* These are escaped items that aren't just an encoding of a particular data |
| 485 |
value such as \n. They must have non-zero values, as check_escape() returns |
value such as \n. They must have non-zero values, as check_escape() returns |
| 486 |
their negation. Also, they must appear in the same order as in the opcode |
their negation. Also, they must appear in the same order as in the opcode |
| 496 |
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E, |
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E, |
| 497 |
ESC_Q, ESC_REF }; |
ESC_Q, ESC_REF }; |
| 498 |
|
|
|
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that |
|
|
contain UTF-8 characters with values greater than 255. */ |
|
|
|
|
|
#define XCL_NOT 0x01 /* Flag: this is a negative class */ |
|
|
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ |
|
|
|
|
|
#define XCL_END 0 /* Marks end of individual items */ |
|
|
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ |
|
|
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ |
|
|
#define XCL_PROP 3 /* Unicode property (one property code) follows */ |
|
|
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ |
|
|
|
|
|
|
|
| 499 |
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets |
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets |
| 500 |
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to |
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to |
| 501 |
OP_EOD must correspond in order to the list of escapes immediately above. |
OP_EOD must correspond in order to the list of escapes immediately above. |
| 659 |
1, /* End */ \ |
1, /* End */ \ |
| 660 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \ |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \ |
| 661 |
1, 1, /* Any, Anybyte */ \ |
1, 1, /* Any, Anybyte */ \ |
| 662 |
2, 2, 1, /* NOTPROP, PROP, EXTUNI */ \ |
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \ |
| 663 |
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \ |
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \ |
| 664 |
2, /* Char - the minimum length */ \ |
2, /* Char - the minimum length */ \ |
| 665 |
2, /* Charnc - the minimum length */ \ |
2, /* Charnc - the minimum length */ \ |
| 791 |
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ |
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ |
| 792 |
int group_num; /* Number of group that was called */ |
int group_num; /* Number of group that was called */ |
| 793 |
const uschar *after_call; /* "Return value": points after the call in the expr */ |
const uschar *after_call; /* "Return value": points after the call in the expr */ |
| 794 |
const uschar *save_start; /* Old value of md->start_match */ |
USPTR save_start; /* Old value of md->start_match */ |
| 795 |
int *offset_save; /* Pointer to start of saved offsets */ |
int *offset_save; /* Pointer to start of saved offsets */ |
| 796 |
int saved_max; /* Number of saved offsets */ |
int saved_max; /* Number of saved offsets */ |
| 797 |
} recursion_info; |
} recursion_info; |
| 810 |
doing traditional NFA matching, so that they are thread-safe. */ |
doing traditional NFA matching, so that they are thread-safe. */ |
| 811 |
|
|
| 812 |
typedef struct match_data { |
typedef struct match_data { |
| 813 |
unsigned long int match_call_count; /* As it says */ |
unsigned long int match_call_count; /* As it says */ |
| 814 |
unsigned long int match_limit;/* As it says */ |
unsigned long int match_limit; /* As it says */ |
| 815 |
|
unsigned long int match_limit_recursion; /* As it says */ |
| 816 |
int *offset_vector; /* Offset vector */ |
int *offset_vector; /* Offset vector */ |
| 817 |
int offset_end; /* One past the end */ |
int offset_end; /* One past the end */ |
| 818 |
int offset_max; /* The maximum usable for return data */ |
int offset_max; /* The maximum usable for return data */ |
| 827 |
BOOL partial; /* PARTIAL flag */ |
BOOL partial; /* PARTIAL flag */ |
| 828 |
BOOL hitend; /* Hit the end of the subject at some point */ |
BOOL hitend; /* Hit the end of the subject at some point */ |
| 829 |
const uschar *start_code; /* For use when recursing */ |
const uschar *start_code; /* For use when recursing */ |
| 830 |
const uschar *start_subject; /* Start of the subject string */ |
USPTR start_subject; /* Start of the subject string */ |
| 831 |
const uschar *end_subject; /* End of the subject string */ |
USPTR end_subject; /* End of the subject string */ |
| 832 |
const uschar *start_match; /* Start of this match attempt */ |
USPTR start_match; /* Start of this match attempt */ |
| 833 |
const uschar *end_match_ptr; /* Subject position at end match */ |
USPTR end_match_ptr; /* Subject position at end match */ |
| 834 |
int end_offset_top; /* Highwater mark at end of match */ |
int end_offset_top; /* Highwater mark at end of match */ |
| 835 |
int capture_last; /* Most recent capture number */ |
int capture_last; /* Most recent capture number */ |
| 836 |
int start_offset; /* The start offset value */ |
int start_offset; /* The start offset value */ |
| 885 |
#define ctypes_offset (cbits_offset + cbit_length) |
#define ctypes_offset (cbits_offset + cbit_length) |
| 886 |
#define tables_length (ctypes_offset + 256) |
#define tables_length (ctypes_offset + 256) |
| 887 |
|
|
| 888 |
/* Layout of the UCP type table that translates property names into codes for |
/* Layout of the UCP type table that translates property names into types and |
| 889 |
pcre_ucp_findchar(). */ |
codes. */ |
| 890 |
|
|
| 891 |
typedef struct { |
typedef struct { |
| 892 |
const char *name; |
const char *name; |
| 893 |
int value; |
pcre_uint16 type; |
| 894 |
|
pcre_uint16 value; |
| 895 |
} ucp_type_table; |
} ucp_type_table; |
| 896 |
|
|
| 897 |
|
|
| 922 |
extern int _pcre_ord2utf8(int, uschar *); |
extern int _pcre_ord2utf8(int, uschar *); |
| 923 |
extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *, |
extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *, |
| 924 |
const pcre_study_data *, pcre_study_data *); |
const pcre_study_data *, pcre_study_data *); |
| 925 |
extern int _pcre_ucp_findchar(const int, int *, int *); |
extern int _pcre_ucp_findprop(const int, int *, int *); |
| 926 |
|
extern int _pcre_ucp_othercase(const int); |
| 927 |
extern int _pcre_valid_utf8(const uschar *, int); |
extern int _pcre_valid_utf8(const uschar *, int); |
| 928 |
extern BOOL _pcre_xclass(int, const uschar *); |
extern BOOL _pcre_xclass(int, const uschar *); |
| 929 |
|
|