--- code/trunk/pcre_exec.c 2007/02/24 21:41:42 93 +++ code/trunk/pcre_exec.c 2007/04/02 13:32:07 144 @@ -6,7 +6,7 @@ and semantics are as close as possible to those of the Perl 5 language. Written by Philip Hazel - Copyright (c) 1997-2006 University of Cambridge + Copyright (c) 1997-2007 University of Cambridge ----------------------------------------------------------------------------- Redistribution and use in source and binary forms, with or without @@ -48,6 +48,11 @@ #include "pcre_internal.h" +/* Undefine some potentially clashing cpp symbols */ + +#undef min +#undef max + /* The chain of eptrblocks for tail recursions uses memory in stack workspace, obtained at top level, the size of which is defined by EPTR_WORK_SIZE. */ @@ -299,6 +304,8 @@ int Xprop_category; int Xprop_chartype; int Xprop_script; + int Xoclength; + uschar Xocchars[8]; #endif int Xctype; @@ -441,6 +448,8 @@ #define prop_category frame->Xprop_category #define prop_chartype frame->Xprop_chartype #define prop_script frame->Xprop_script +#define oclength frame->Xoclength +#define occhars frame->Xocchars #endif #define ctype frame->Xctype @@ -494,6 +503,8 @@ int prop_category; int prop_chartype; int prop_script; +int oclength; +uschar occhars[8]; #endif int ctype; @@ -2045,19 +2056,18 @@ if (length > 1) { - int oclength = 0; - uschar occhars[8]; - #ifdef SUPPORT_UCP unsigned int othercase; if ((ims & PCRE_CASELESS) != 0 && (othercase = _pcre_ucp_othercase(fc)) != NOTACHAR) oclength = _pcre_ord2utf8(othercase, occhars); + else oclength = 0; #endif /* SUPPORT_UCP */ for (i = 1; i <= min; i++) { if (memcmp(eptr, charptr, length) == 0) eptr += length; +#ifdef SUPPORT_UCP /* Need braces because of following else */ else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } else @@ -2065,6 +2075,9 @@ if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); eptr += oclength; } +#else /* without SUPPORT_UCP */ + else { RRETURN(MATCH_NOMATCH); } +#endif /* SUPPORT_UCP */ } if (min == max) continue; @@ -2077,6 +2090,7 @@ if (rrc != MATCH_NOMATCH) RRETURN(rrc); if (fi >= max || eptr >= md->end_subject) RRETURN(MATCH_NOMATCH); if (memcmp(eptr, charptr, length) == 0) eptr += length; +#ifdef SUPPORT_UCP /* Need braces because of following else */ else if (oclength == 0) { RRETURN(MATCH_NOMATCH); } else @@ -2084,6 +2098,9 @@ if (memcmp(eptr, occhars, oclength) != 0) RRETURN(MATCH_NOMATCH); eptr += oclength; } +#else /* without SUPPORT_UCP */ + else { RRETURN (MATCH_NOMATCH); } +#endif /* SUPPORT_UCP */ } /* Control never gets here */ } @@ -2095,22 +2112,31 @@ { if (eptr > md->end_subject - length) break; if (memcmp(eptr, charptr, length) == 0) eptr += length; +#ifdef SUPPORT_UCP else if (oclength == 0) break; else { if (memcmp(eptr, occhars, oclength) != 0) break; eptr += oclength; } +#else /* without SUPPORT_UCP */ + else break; +#endif /* SUPPORT_UCP */ } if (possessive) continue; - while (eptr >= pp) + for(;;) { RMATCH(rrc, eptr, ecode, offset_top, md, ims, eptrb, 0); if (rrc != MATCH_NOMATCH) RRETURN(rrc); + if (eptr == pp) RRETURN(MATCH_NOMATCH); +#ifdef SUPPORT_UCP + eptr--; + BACKCHAR(eptr); +#else /* without SUPPORT_UCP */ eptr -= length; +#endif /* SUPPORT_UCP */ } - RRETURN(MATCH_NOMATCH); } /* Control never gets here */ } @@ -3802,10 +3828,10 @@ md->lcc = tables + lcc_offset; md->ctypes = tables + ctypes_offset; -/* Handle different types of newline. The two bits give four cases. If nothing -is set at run time, whatever was used at compile time applies. */ +/* Handle different types of newline. The three bits give eight cases. If +nothing is set at run time, whatever was used at compile time applies. */ -switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : options) & +switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) & PCRE_NEWLINE_BITS) { case 0: newline = NEWLINE; break; /* Compile-time default */ @@ -3992,6 +4018,15 @@ { while (start_match <= end_subject && !WAS_NEWLINE(start_match)) start_match++; + + /* If we have just passed a CR and the newline option is ANY, and we are + now at a LF, advance the match position by one more character. */ + + if (start_match[-1] == '\r' && + md->nltype == NLTYPE_ANY && + start_match < end_subject && + *start_match == '\n') + start_match++; } }