--- code/trunk/pcretest.c 2007/03/19 10:20:14 126 +++ code/trunk/pcretest.c 2007/03/30 15:46:27 141 @@ -1972,6 +1972,7 @@ for (;; gmatched++) /* Loop for /g or /G */ { + int gany_fudge; if (timeitm > 0) { register int i; @@ -2255,25 +2256,39 @@ what Perl's /g options does. This turns out to be rather cunning. First we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the same point. If this fails (picked up above) we advance to the next - character. */ + character. + + Yet more complication arises in the case when the newline option is + "any" and a pattern in multiline mode has to match at the start of a + line. If a previous match was at the end of a line, and advance of one + character just passes the \r, whereas we should prefer the longer newline + sequence, as does the code in pcre_exec(). So we fudge it. */ g_notempty = 0; + gany_fudge = 0; + if (use_offsets[0] == use_offsets[1]) { if (use_offsets[0] == len) break; g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; + if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 && + (((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY && + use_offsets[0] < len - 1 && + bptr[use_offsets[0]] == '\r' && + bptr[use_offsets[0]+1] == '\n') + gany_fudge = 1; } /* For /g, update the start offset, leaving the rest alone */ - if (do_g) start_offset = use_offsets[1]; + if (do_g) start_offset = use_offsets[1] + gany_fudge; /* For /G, update the pointer and length */ else { - bptr += use_offsets[1]; - len -= use_offsets[1]; + bptr += use_offsets[1] + gany_fudge; + len -= use_offsets[1] + gany_fudge; } } /* End of loop for /g and /G */