| 1972 |
|
|
| 1973 |
for (;; gmatched++) /* Loop for /g or /G */ |
for (;; gmatched++) /* Loop for /g or /G */ |
| 1974 |
{ |
{ |
|
int gany_fudge; |
|
| 1975 |
if (timeitm > 0) |
if (timeitm > 0) |
| 1976 |
{ |
{ |
| 1977 |
register int i; |
register int i; |
| 2211 |
} |
} |
| 2212 |
|
|
| 2213 |
/* Failed to match. If this is a /g or /G loop and we previously set |
/* Failed to match. If this is a /g or /G loop and we previously set |
| 2214 |
g_notempty after a null match, this is not necessarily the end. |
g_notempty after a null match, this is not necessarily the end. We want |
| 2215 |
We want to advance the start offset, and continue. In the case of UTF-8 |
to advance the start offset, and continue. We won't be at the end of the |
| 2216 |
matching, the advance must be one character, not one byte. Fudge the |
string - that was checked before setting g_notempty. |
| 2217 |
offset values to achieve this. We won't be at the end of the string - |
|
| 2218 |
that was checked before setting g_notempty. */ |
Complication arises in the case when the newline option is "any". |
| 2219 |
|
If the previous match was at the end of a line terminated by CRLF, an |
| 2220 |
|
advance of one character just passes the \r, whereas we should prefer the |
| 2221 |
|
longer newline sequence, as does the code in pcre_exec(). Fudge the |
| 2222 |
|
offset value to achieve this. |
| 2223 |
|
|
| 2224 |
|
Otherwise, in the case of UTF-8 matching, the advance must be one |
| 2225 |
|
character, not one byte. */ |
| 2226 |
|
|
| 2227 |
else |
else |
| 2228 |
{ |
{ |
| 2230 |
{ |
{ |
| 2231 |
int onechar = 1; |
int onechar = 1; |
| 2232 |
use_offsets[0] = start_offset; |
use_offsets[0] = start_offset; |
| 2233 |
if (use_utf8) |
if ((((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == |
| 2234 |
|
PCRE_NEWLINE_ANY && |
| 2235 |
|
start_offset < len - 1 && |
| 2236 |
|
bptr[start_offset] == '\r' && |
| 2237 |
|
bptr[start_offset+1] == '\n') |
| 2238 |
|
onechar++; |
| 2239 |
|
else if (use_utf8) |
| 2240 |
{ |
{ |
| 2241 |
while (start_offset + onechar < len) |
while (start_offset + onechar < len) |
| 2242 |
{ |
{ |
| 2268 |
what Perl's /g options does. This turns out to be rather cunning. First |
what Perl's /g options does. This turns out to be rather cunning. First |
| 2269 |
we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the |
we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the |
| 2270 |
same point. If this fails (picked up above) we advance to the next |
same point. If this fails (picked up above) we advance to the next |
| 2271 |
character. |
character. */ |
|
|
|
|
Yet more complication arises in the case when the newline option is |
|
|
"any" and a pattern in multiline mode has to match at the start of a |
|
|
line. If a previous match was at the end of a line, and advance of one |
|
|
character just passes the \r, whereas we should prefer the longer newline |
|
|
sequence, as does the code in pcre_exec(). So we fudge it. */ |
|
| 2272 |
|
|
| 2273 |
g_notempty = 0; |
g_notempty = 0; |
|
gany_fudge = 0; |
|
| 2274 |
|
|
| 2275 |
if (use_offsets[0] == use_offsets[1]) |
if (use_offsets[0] == use_offsets[1]) |
| 2276 |
{ |
{ |
| 2277 |
if (use_offsets[0] == len) break; |
if (use_offsets[0] == len) break; |
| 2278 |
g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; |
g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED; |
|
if ((((real_pcre *)re)->options & PCRE_STARTLINE) != 0 && |
|
|
(((real_pcre *)re)->options & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY && |
|
|
use_offsets[0] < len - 1 && |
|
|
bptr[use_offsets[0]] == '\r' && |
|
|
bptr[use_offsets[0]+1] == '\n') |
|
|
gany_fudge = 1; |
|
| 2279 |
} |
} |
| 2280 |
|
|
| 2281 |
/* For /g, update the start offset, leaving the rest alone */ |
/* For /g, update the start offset, leaving the rest alone */ |
| 2282 |
|
|
| 2283 |
if (do_g) start_offset = use_offsets[1] + gany_fudge; |
if (do_g) start_offset = use_offsets[1]; |
| 2284 |
|
|
| 2285 |
/* For /G, update the pointer and length */ |
/* For /G, update the pointer and length */ |
| 2286 |
|
|
| 2287 |
else |
else |
| 2288 |
{ |
{ |
| 2289 |
bptr += use_offsets[1] + gany_fudge; |
bptr += use_offsets[1]; |
| 2290 |
len -= use_offsets[1] + gany_fudge; |
len -= use_offsets[1]; |
| 2291 |
} |
} |
| 2292 |
} /* End of loop for /g and /G */ |
} /* End of loop for /g and /G */ |
| 2293 |
|
|