--- code/trunk/pcre_exec.c 2007/08/16 11:46:40 219 +++ code/trunk/pcre_exec.c 2007/09/11 12:57:06 236 @@ -43,7 +43,7 @@ possible. There are also some static supporting functions. */ #ifdef HAVE_CONFIG_H -#include +#include "config.h" #endif #define NLBLOCK md /* Block containing newline information */ @@ -1526,12 +1526,16 @@ case 0x000d: if (eptr < md->end_subject && *eptr == 0x0a) eptr++; break; + case 0x000a: + break; + case 0x000b: case 0x000c: case 0x0085: case 0x2028: case 0x2029: + if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); break; } ecode++; @@ -2954,12 +2958,16 @@ case 0x000d: if (eptr < md->end_subject && *eptr == 0x0a) eptr++; break; + case 0x000a: + break; + case 0x000b: case 0x000c: case 0x0085: case 0x2028: case 0x2029: + if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); break; } } @@ -3172,9 +3180,12 @@ if (eptr < md->end_subject && *eptr == 0x0a) eptr++; break; case 0x000a: + break; + case 0x000b: case 0x000c: case 0x0085: + if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); break; } } @@ -3426,11 +3437,14 @@ if (eptr < md->end_subject && *eptr == 0x0a) eptr++; break; case 0x000a: + break; + case 0x000b: case 0x000c: case 0x0085: case 0x2028: case 0x2029: + if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); break; } break; @@ -3582,10 +3596,14 @@ case 0x000d: if (eptr < md->end_subject && *eptr == 0x0a) eptr++; break; + case 0x000a: + break; + case 0x000b: case 0x000c: case 0x0085: + if (md->bsr_anycrlf) RRETURN(MATCH_NOMATCH); break; } break; @@ -3883,8 +3901,10 @@ } else { - if (c != 0x000a && c != 0x000b && c != 0x000c && - c != 0x0085 && c != 0x2028 && c != 0x2029) + if (c != 0x000a && + (md->bsr_anycrlf || + (c != 0x000b && c != 0x000c && + c != 0x0085 && c != 0x2028 && c != 0x2029))) break; eptr += len; } @@ -4074,7 +4094,9 @@ } else { - if (c != 0x000a && c != 0x000b && c != 0x000c && c != 0x0085) + if (c != 0x000a && + (md->bsr_anycrlf || + (c != 0x000b && c != 0x000c && c != 0x0085))) break; eptr++; } @@ -4408,7 +4430,7 @@ /* Set up other data */ anchored = ((re->options | options) & PCRE_ANCHORED) != 0; -startline = (re->options & PCRE_STARTLINE) != 0; +startline = (re->flags & PCRE_STARTLINE) != 0; firstline = (re->options & PCRE_FIRSTLINE) != 0; /* The code starts after the real_pcre block and the capture name table. */ @@ -4435,11 +4457,37 @@ md->lcc = tables + lcc_offset; md->ctypes = tables + ctypes_offset; +/* Handle different \R options. */ + +switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) + { + case 0: + if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0) + md->bsr_anycrlf = (re->options & PCRE_BSR_ANYCRLF) != 0; + else +#ifdef BSR_ANYCRLF + md->bsr_anycrlf = TRUE; +#else + md->bsr_anycrlf = FALSE; +#endif + break; + + case PCRE_BSR_ANYCRLF: + md->bsr_anycrlf = TRUE; + break; + + case PCRE_BSR_UNICODE: + md->bsr_anycrlf = FALSE; + break; + + default: return PCRE_ERROR_BADNEWLINE; + } + /* Handle different types of newline. The three bits give eight cases. If nothing is set at run time, whatever was used at compile time applies. */ -switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) & - PCRE_NEWLINE_BITS) +switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : + (pcre_uint32)options) & PCRE_NEWLINE_BITS) { case 0: newline = NEWLINE; break; /* Compile-time default */ case PCRE_NEWLINE_CR: newline = '\r'; break; @@ -4478,7 +4526,7 @@ /* Partial matching is supported only for a restricted set of regexes at the moment. */ -if (md->partial && (re->options & PCRE_NOPARTIAL) != 0) +if (md->partial && (re->flags & PCRE_NOPARTIAL) != 0) return PCRE_ERROR_BADPARTIAL; /* Check a UTF-8 string if required. Unfortunately there's no way of passing @@ -4555,7 +4603,7 @@ if (!anchored) { - if ((re->options & PCRE_FIRSTSET) != 0) + if ((re->flags & PCRE_FIRSTSET) != 0) { first_byte = re->first_byte & 255; if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE) @@ -4570,7 +4618,7 @@ /* For anchored or unanchored matches, there may be a "last known required character" set. */ -if ((re->options & PCRE_REQCHSET) != 0) +if ((re->flags & PCRE_REQCHSET) != 0) { req_byte = re->req_byte & 255; req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0; @@ -4785,16 +4833,17 @@ if (anchored || start_match > end_subject) break; - /* If we have just passed a CR and the newline option is CRLF or ANY or - ANYCRLF, and we are now at a LF, advance the match position by one more - character. */ + /* If we have just passed a CR and we are now at a LF, and the pattern does + not contain any explicit matches for \r or \n, and the newline option is CRLF + or ANY or ANYCRLF, advance the match position by one more character. */ if (start_match[-1] == '\r' && - (md->nltype == NLTYPE_ANY || - md->nltype == NLTYPE_ANYCRLF || - md->nllen == 2) && - start_match < end_subject && - *start_match == '\n') + start_match < end_subject && + *start_match == '\n' && + (re->flags & PCRE_HASCRORLF) == 0 && + (md->nltype == NLTYPE_ANY || + md->nltype == NLTYPE_ANYCRLF || + md->nllen == 2)) start_match++; } /* End of for(;;) "bumpalong" loop */