--- code/trunk/pcre_internal.h 2010/05/05 10:44:20 517 +++ code/trunk/pcre_internal.h 2010/05/18 15:47:01 518 @@ -580,7 +580,7 @@ PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK|PCRE_AUTO_CALLOUT|PCRE_FIRSTLINE| \ PCRE_DUPNAMES|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \ - PCRE_JAVASCRIPT_COMPAT) + PCRE_JAVASCRIPT_COMPAT|PCRE_UCP) #define PUBLIC_EXEC_OPTIONS \ (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \ @@ -905,6 +905,7 @@ #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)" #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)" #define STRING_UTF8_RIGHTPAR "UTF8)" +#define STRING_UCP_RIGHTPAR "UCP)" #else /* SUPPORT_UTF8 */ @@ -1158,6 +1159,7 @@ #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS #define STRING_UTF8_RIGHTPAR STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS +#define STRING_UCP_RIGHTPAR STR_U STR_C STR_P STR_RIGHT_PARENTHESIS #endif /* SUPPORT_UTF8 */ @@ -1218,6 +1220,11 @@ used for [^] in JavaScript compatibility mode. In non-DOTALL mode, "." behaves like \N. +The special values ESC_DU, ESC_du, etc. are used instead of ESC_D, ESC_d, etc. +when PCRE_UCP is set, when replacement of \d etc by \p sequences is required. +They must be contiguous, and remain in order so that the replacements can be +looked up from a table. + The final escape must be ESC_REF as subsequent values are used for backreferences (\1, \2, \3, etc). There are two tests in the code for an escape greater than ESC_b and less than ESC_Z to detect the types that may be @@ -1227,10 +1234,11 @@ enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_N, ESC_dum, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, - ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_g, ESC_k, + ESC_h, ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, + ESC_E, ESC_Q, ESC_g, ESC_k, + ESC_DU, ESC_du, ESC_SU, ESC_su, ESC_WU, ESC_wu, ESC_REF }; - /* Opcode table: Starting from 1 (i.e. after OP_END), the values up to OP_EOD must correspond in order to the list of escapes immediately above. @@ -1525,7 +1533,7 @@ ERR30, ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49, ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59, - ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERRCOUNT }; + ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERRCOUNT }; /* The real format of the start of the pcre block; the index of names and the code vector run on as long as necessary after the end. We store an explicit @@ -1668,6 +1676,7 @@ BOOL noteol; /* NOTEOL flag */ BOOL utf8; /* UTF8 flag */ BOOL jscript_compat; /* JAVASCRIPT_COMPAT flag */ + BOOL use_ucp; /* PCRE_UCP flag */ BOOL endonly; /* Dollar not before final \n */ BOOL notempty; /* Empty string match not wanted */ BOOL notempty_atstart; /* Empty string match at start not wanted */